From 8bddfb24215bac5b06f05a653524d80b140b9aa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 8 Feb 2026 21:28:13 +0000 Subject: [PATCH] Add migration for multi-value genres field * Move genre-to-genres migration into a dedicated Migration class and wire it into Library._migrations for items and albums. * Add batched SQL updates via mutate_many and share the multi-value delimiter as a constant. * Cover migration behavior with new tests. I initially attempted to migrate using our model infrastructure / Model.store(), see the comparison below: Durations migrating my library of ~9000 items and ~2300 albums: 1. Using our Python logic: 11 minutes 2. Using SQL directly: 4 seconds That's why I've gone ahead with option 2. --- beets/dbcore/db.py | 23 +++++++ beets/dbcore/types.py | 3 +- beets/library/library.py | 112 ++------------------------------ beets/library/migrations.py | 94 +++++++++++++++++++++++++++ beets/test/helper.py | 2 + test/library/__init__.py | 0 test/library/test_migrations.py | 54 +++++++++++++++ 7 files changed, 179 insertions(+), 109 deletions(-) create mode 100644 beets/library/migrations.py create mode 100644 test/library/__init__.py create mode 100644 test/library/test_migrations.py diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index 613430651..20facc7a0 100755 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -1023,6 +1023,29 @@ class Transaction: self._mutated = True return cursor.lastrowid + def mutate_many( + self, statement: str, subvals: Sequence[tuple[SQLiteType, ...]] = () + ) -> Any: + """Execute an SQL statement with substitution values and return + the row ID of the last affected row. + """ + try: + cursor = self.db._connection().executemany(statement, subvals) + except sqlite3.OperationalError as e: + # In two specific cases, SQLite reports an error while accessing + # the underlying database file. We surface these exceptions as + # DBAccessError so the application can abort. + if e.args[0] in ( + "attempt to write a readonly database", + "unable to open database file", + ): + raise DBAccessError(e.args[0]) + else: + raise + else: + self._mutated = True + return cursor.lastrowid + def script(self, statements: str): """Execute a string containing multiple SQL statements.""" # We don't know whether this mutates, but quite likely it does. diff --git a/beets/dbcore/types.py b/beets/dbcore/types.py index 61336d9ce..92c2160d4 100644 --- a/beets/dbcore/types.py +++ b/beets/dbcore/types.py @@ -30,6 +30,7 @@ from . import query SQLiteType = query.SQLiteType BLOB_TYPE = query.BLOB_TYPE +MULTI_VALUE_DELIMITER = "\\␀" class ModelType(typing.Protocol): @@ -467,4 +468,4 @@ DATE = DateType() SEMICOLON_SPACE_DSV = DelimitedString(delimiter="; ") # Will set the proper null char in mediafile -MULTI_VALUE_DSV = DelimitedString(delimiter="\\␀") +MULTI_VALUE_DSV = DelimitedString(delimiter=MULTI_VALUE_DELIMITER) diff --git a/beets/library/library.py b/beets/library/library.py index 16a8fcf93..263c1a9ab 100644 --- a/beets/library/library.py +++ b/beets/library/library.py @@ -5,17 +5,16 @@ from typing import TYPE_CHECKING import platformdirs import beets -from beets import dbcore, logging, ui -from beets.autotag import correct_list_fields +from beets import dbcore, logging from beets.util import normpath +from .migrations import MultiGenreFieldMigration from .models import Album, Item from .queries import PF_KEY_DEFAULT, parse_query_parts, parse_query_string if TYPE_CHECKING: - from collections.abc import Mapping + from beets.dbcore import Results - from beets.dbcore import Results, types log = logging.getLogger("beets") @@ -24,6 +23,7 @@ class Library(dbcore.Database): """A database of music containing songs and albums.""" _models = (Item, Album) + _migrations = ((MultiGenreFieldMigration, (Item, Album)),) def __init__( self, @@ -147,107 +147,3 @@ class Library(dbcore.Database): item_or_id if isinstance(item_or_id, int) else item_or_id.album_id ) return self._get(Album, album_id) if album_id else None - - # Database schema migration. - - def _make_table(self, table: str, fields: Mapping[str, types.Type]): - """Set up the schema of the database, and migrate genres if needed.""" - with self.transaction() as tx: - rows = tx.query(f"PRAGMA table_info({table})") - current_fields = {row[1] for row in rows} - field_names = set(fields.keys()) - - # Check if genres column is being added to items table - genres_being_added = ( - table == "items" - and "genres" in field_names - and "genres" not in current_fields - and "genre" in current_fields - ) - - # Call parent to create/update table - super()._make_table(table, fields) - - # Migrate genre to genres if genres column was just added - if genres_being_added: - self._migrate_genre_to_genres() - - def _migrate_genre_to_genres(self): - """Migrate comma-separated genre strings to genres list. - - This migration runs automatically when the genres column is first - created in the database. It splits comma-separated genre values - and writes the changes to both the database and media files. - """ - items = list(self.items()) - migrated_count = 0 - total_items = len(items) - - if total_items == 0: - return - - ui.print_(f"Migrating genres for {total_items} items...") - - for index, item in enumerate(items, 1): - genre_val = item.genre or "" - genres_val = item.genres or [] - - # Check if migration is needed - needs_migration = False - split_genres = [] - if not genres_val and genre_val: - separators = [] - if ( - "lastgenre" in beets.config - and "separator" in beets.config["lastgenre"] - ): - try: - user_sep = beets.config["lastgenre"][ - "separator" - ].as_str() - if user_sep: - separators.append(user_sep) - except ( - beets.config.ConfigNotFoundError, - beets.config.ConfigTypeError, - ): - pass - - separators.extend([", ", "; ", " / "]) - - for separator in separators: - if separator in genre_val: - split_genres = [ - g.strip() - for g in genre_val.split(separator) - if g.strip() - ] - if len(split_genres) > 1: - needs_migration = True - break - - if needs_migration: - migrated_count += 1 - # Show progress every 100 items - if migrated_count % 100 == 0: - ui.print_( - f" Migrated {migrated_count} items " - f"({index}/{total_items} processed)..." - ) - # Migrate using the same logic as correct_list_fields - correct_list_fields(item) - item.store() - # Write to media file - try: - item.try_write() - except Exception as e: - log.warning( - "Could not write genres to {}: {}", - item.path, - e, - ) - - ui.print_( - f"Migration complete: {migrated_count} of {total_items} items " - f"updated with comma-separated genres" - ) diff --git a/beets/library/migrations.py b/beets/library/migrations.py new file mode 100644 index 000000000..c9daa433b --- /dev/null +++ b/beets/library/migrations.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from contextlib import contextmanager, suppress +from functools import cached_property +from typing import TYPE_CHECKING, NamedTuple, TypeVar + +from confuse.exceptions import ConfigError + +import beets +from beets import ui +from beets.dbcore.db import Migration +from beets.dbcore.types import MULTI_VALUE_DELIMITER +from beets.util import unique_list + +if TYPE_CHECKING: + from collections.abc import Iterator + +T = TypeVar("T") + + +class GenreRow(NamedTuple): + id: int + genre: str + genres: str | None + + +def chunks(lst: list[T], n: int) -> Iterator[list[T]]: + """Yield successive n-sized chunks from lst.""" + for i in range(0, len(lst), n): + yield lst[i : i + n] + + +class MultiGenreFieldMigration(Migration): + @cached_property + def separators(self) -> list[str]: + separators = [] + with suppress(ConfigError): + separators.append(beets.config["lastgenre"]["separator"].as_str()) + + separators.extend([", ", "; ", " / "]) + return unique_list(filter(None, separators)) + + @contextmanager + def with_factory(self, factory: type[NamedTuple]) -> Iterator[None]: + """Temporarily set the row factory to a specific type.""" + original_factory = self.db._connection().row_factory + self.db._connection().row_factory = lambda _, row: factory(*row) + try: + yield + finally: + self.db._connection().row_factory = original_factory + + def get_genres(self, genre: str) -> str: + for separator in self.separators: + if separator in genre: + return genre.replace(separator, MULTI_VALUE_DELIMITER) + + return genre + + def _migrate_data(self, table: str) -> None: + """Migrate legacy genre values to the multi-value genres field.""" + + with self.db.transaction() as tx, self.with_factory(GenreRow): + rows: list[GenreRow] = tx.query( # type: ignore[assignment] + f""" + SELECT id, genre, genres + FROM {table} + WHERE genre IS NOT NULL AND genre != '' + """ + ) + + total = len(rows) + to_migrate = [e for e in rows if not e.genres] + if not to_migrate: + return + + migrated = total - len(to_migrate) + + ui.print_(f"Migrating genres for {total} {table}...") + for batch in chunks(to_migrate, 100): + with self.db.transaction() as tx: + tx.mutate_many( + f"UPDATE {table} SET genres = ? WHERE id = ?", + [(self.get_genres(e.genre), e.id) for e in batch], + ) + + migrated += len(batch) + + ui.print_( + f" Migrated {migrated} {table} " + f"({migrated}/{total} processed)..." + ) + + ui.print_(f"Migration complete: {migrated} of {total} {table} updated") diff --git a/beets/test/helper.py b/beets/test/helper.py index 7762ab866..218b778c7 100644 --- a/beets/test/helper.py +++ b/beets/test/helper.py @@ -156,6 +156,8 @@ class TestHelper(RunMixin, ConfigMixin): fixtures. """ + lib: Library + resource_path = Path(os.fsdecode(_common.RSRC)) / "full.mp3" db_on_disk: ClassVar[bool] = False diff --git a/test/library/__init__.py b/test/library/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/library/test_migrations.py b/test/library/test_migrations.py new file mode 100644 index 000000000..3a37232fd --- /dev/null +++ b/test/library/test_migrations.py @@ -0,0 +1,54 @@ +import pytest + +from beets.library.migrations import MultiGenreFieldMigration +from beets.library.models import Album, Item +from beets.test.helper import TestHelper + + +class TestMultiGenreFieldMigration: + @pytest.fixture + def helper(self, monkeypatch): + monkeypatch.setattr("beets.library.library.Library._migrations", ()) + helper = TestHelper() + helper.setup_beets() + + monkeypatch.setattr( + "beets.library.library.Library._migrations", + ((MultiGenreFieldMigration, (Item, Album)),), + ) + yield helper + + helper.teardown_beets() + + def test_migrates_only_rows_with_missing_genres(self, helper: TestHelper): + helper.config["lastgenre"]["separator"] = " - " + + expected_item_genres = [] + for genre, initial_genres, expected_genres in [ + # already existing value is not overwritten + ("Item Rock", ("Ignored",), ("Ignored",)), + ("", (), ()), + ("Rock", (), ("Rock",)), + # multiple genres are split on one of default separators + ("Item Rock; Alternative", (), ("Item Rock", "Alternative")), + # multiple genres are split the first (lastgenre) separator ONLY + ("Item - Rock, Alternative", (), ("Item", "Rock, Alternative")), + ]: + helper.add_item(genre=genre, genres=initial_genres) + expected_item_genres.append(expected_genres) + + unmigrated_album = helper.add_album( + genre="Album Rock / Alternative", genres=[] + ) + expected_item_genres.append(("Album Rock", "Alternative")) + + helper.lib._migrate() + + actual_item_genres = [tuple(i.genres) for i in helper.lib.items()] + assert actual_item_genres == expected_item_genres + + unmigrated_album.load() + assert unmigrated_album.genres == ["Album Rock", "Alternative"] + + assert helper.lib.get_migration_state("multi_genre_field_items") + assert helper.lib.get_migration_state("multi_genre_field_albums")