mirror of
https://github.com/beetbox/beets.git
synced 2026-02-19 22:03:05 +01:00
Add migration for multi-value genres field
* Move genre-to-genres migration into a dedicated Migration class and wire it into Library._migrations for items and albums. * Add batched SQL updates via mutate_many and share the multi-value delimiter as a constant. * Cover migration behavior with new tests. I initially attempted to migrate using our model infrastructure / Model.store(), see the comparison below: Durations migrating my library of ~9000 items and ~2300 albums: 1. Using our Python logic: 11 minutes 2. Using SQL directly: 4 seconds That's why I've gone ahead with option 2.
This commit is contained in:
parent
79167f1df7
commit
8bddfb2421
7 changed files with 179 additions and 109 deletions
|
|
@ -1023,6 +1023,29 @@ class Transaction:
|
|||
self._mutated = True
|
||||
return cursor.lastrowid
|
||||
|
||||
def mutate_many(
|
||||
self, statement: str, subvals: Sequence[tuple[SQLiteType, ...]] = ()
|
||||
) -> Any:
|
||||
"""Execute an SQL statement with substitution values and return
|
||||
the row ID of the last affected row.
|
||||
"""
|
||||
try:
|
||||
cursor = self.db._connection().executemany(statement, subvals)
|
||||
except sqlite3.OperationalError as e:
|
||||
# In two specific cases, SQLite reports an error while accessing
|
||||
# the underlying database file. We surface these exceptions as
|
||||
# DBAccessError so the application can abort.
|
||||
if e.args[0] in (
|
||||
"attempt to write a readonly database",
|
||||
"unable to open database file",
|
||||
):
|
||||
raise DBAccessError(e.args[0])
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
self._mutated = True
|
||||
return cursor.lastrowid
|
||||
|
||||
def script(self, statements: str):
|
||||
"""Execute a string containing multiple SQL statements."""
|
||||
# We don't know whether this mutates, but quite likely it does.
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ from . import query
|
|||
|
||||
SQLiteType = query.SQLiteType
|
||||
BLOB_TYPE = query.BLOB_TYPE
|
||||
MULTI_VALUE_DELIMITER = "\\␀"
|
||||
|
||||
|
||||
class ModelType(typing.Protocol):
|
||||
|
|
@ -467,4 +468,4 @@ DATE = DateType()
|
|||
SEMICOLON_SPACE_DSV = DelimitedString(delimiter="; ")
|
||||
|
||||
# Will set the proper null char in mediafile
|
||||
MULTI_VALUE_DSV = DelimitedString(delimiter="\\␀")
|
||||
MULTI_VALUE_DSV = DelimitedString(delimiter=MULTI_VALUE_DELIMITER)
|
||||
|
|
|
|||
|
|
@ -5,17 +5,16 @@ from typing import TYPE_CHECKING
|
|||
import platformdirs
|
||||
|
||||
import beets
|
||||
from beets import dbcore, logging, ui
|
||||
from beets.autotag import correct_list_fields
|
||||
from beets import dbcore, logging
|
||||
from beets.util import normpath
|
||||
|
||||
from .migrations import MultiGenreFieldMigration
|
||||
from .models import Album, Item
|
||||
from .queries import PF_KEY_DEFAULT, parse_query_parts, parse_query_string
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
from beets.dbcore import Results
|
||||
|
||||
from beets.dbcore import Results, types
|
||||
|
||||
log = logging.getLogger("beets")
|
||||
|
||||
|
|
@ -24,6 +23,7 @@ class Library(dbcore.Database):
|
|||
"""A database of music containing songs and albums."""
|
||||
|
||||
_models = (Item, Album)
|
||||
_migrations = ((MultiGenreFieldMigration, (Item, Album)),)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -147,107 +147,3 @@ class Library(dbcore.Database):
|
|||
item_or_id if isinstance(item_or_id, int) else item_or_id.album_id
|
||||
)
|
||||
return self._get(Album, album_id) if album_id else None
|
||||
|
||||
# Database schema migration.
|
||||
|
||||
def _make_table(self, table: str, fields: Mapping[str, types.Type]):
|
||||
"""Set up the schema of the database, and migrate genres if needed."""
|
||||
with self.transaction() as tx:
|
||||
rows = tx.query(f"PRAGMA table_info({table})")
|
||||
current_fields = {row[1] for row in rows}
|
||||
field_names = set(fields.keys())
|
||||
|
||||
# Check if genres column is being added to items table
|
||||
genres_being_added = (
|
||||
table == "items"
|
||||
and "genres" in field_names
|
||||
and "genres" not in current_fields
|
||||
and "genre" in current_fields
|
||||
)
|
||||
|
||||
# Call parent to create/update table
|
||||
super()._make_table(table, fields)
|
||||
|
||||
# Migrate genre to genres if genres column was just added
|
||||
if genres_being_added:
|
||||
self._migrate_genre_to_genres()
|
||||
|
||||
def _migrate_genre_to_genres(self):
|
||||
"""Migrate comma-separated genre strings to genres list.
|
||||
|
||||
This migration runs automatically when the genres column is first
|
||||
created in the database. It splits comma-separated genre values
|
||||
and writes the changes to both the database and media files.
|
||||
"""
|
||||
items = list(self.items())
|
||||
migrated_count = 0
|
||||
total_items = len(items)
|
||||
|
||||
if total_items == 0:
|
||||
return
|
||||
|
||||
ui.print_(f"Migrating genres for {total_items} items...")
|
||||
|
||||
for index, item in enumerate(items, 1):
|
||||
genre_val = item.genre or ""
|
||||
genres_val = item.genres or []
|
||||
|
||||
# Check if migration is needed
|
||||
needs_migration = False
|
||||
split_genres = []
|
||||
if not genres_val and genre_val:
|
||||
separators = []
|
||||
if (
|
||||
"lastgenre" in beets.config
|
||||
and "separator" in beets.config["lastgenre"]
|
||||
):
|
||||
try:
|
||||
user_sep = beets.config["lastgenre"][
|
||||
"separator"
|
||||
].as_str()
|
||||
if user_sep:
|
||||
separators.append(user_sep)
|
||||
except (
|
||||
beets.config.ConfigNotFoundError,
|
||||
beets.config.ConfigTypeError,
|
||||
):
|
||||
pass
|
||||
|
||||
separators.extend([", ", "; ", " / "])
|
||||
|
||||
for separator in separators:
|
||||
if separator in genre_val:
|
||||
split_genres = [
|
||||
g.strip()
|
||||
for g in genre_val.split(separator)
|
||||
if g.strip()
|
||||
]
|
||||
if len(split_genres) > 1:
|
||||
needs_migration = True
|
||||
break
|
||||
|
||||
if needs_migration:
|
||||
migrated_count += 1
|
||||
# Show progress every 100 items
|
||||
if migrated_count % 100 == 0:
|
||||
ui.print_(
|
||||
f" Migrated {migrated_count} items "
|
||||
f"({index}/{total_items} processed)..."
|
||||
)
|
||||
# Migrate using the same logic as correct_list_fields
|
||||
correct_list_fields(item)
|
||||
item.store()
|
||||
# Write to media file
|
||||
try:
|
||||
item.try_write()
|
||||
except Exception as e:
|
||||
log.warning(
|
||||
"Could not write genres to {}: {}",
|
||||
item.path,
|
||||
e,
|
||||
)
|
||||
|
||||
ui.print_(
|
||||
f"Migration complete: {migrated_count} of {total_items} items "
|
||||
f"updated with comma-separated genres"
|
||||
)
|
||||
|
|
|
|||
94
beets/library/migrations.py
Normal file
94
beets/library/migrations.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager, suppress
|
||||
from functools import cached_property
|
||||
from typing import TYPE_CHECKING, NamedTuple, TypeVar
|
||||
|
||||
from confuse.exceptions import ConfigError
|
||||
|
||||
import beets
|
||||
from beets import ui
|
||||
from beets.dbcore.db import Migration
|
||||
from beets.dbcore.types import MULTI_VALUE_DELIMITER
|
||||
from beets.util import unique_list
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class GenreRow(NamedTuple):
|
||||
id: int
|
||||
genre: str
|
||||
genres: str | None
|
||||
|
||||
|
||||
def chunks(lst: list[T], n: int) -> Iterator[list[T]]:
|
||||
"""Yield successive n-sized chunks from lst."""
|
||||
for i in range(0, len(lst), n):
|
||||
yield lst[i : i + n]
|
||||
|
||||
|
||||
class MultiGenreFieldMigration(Migration):
|
||||
@cached_property
|
||||
def separators(self) -> list[str]:
|
||||
separators = []
|
||||
with suppress(ConfigError):
|
||||
separators.append(beets.config["lastgenre"]["separator"].as_str())
|
||||
|
||||
separators.extend([", ", "; ", " / "])
|
||||
return unique_list(filter(None, separators))
|
||||
|
||||
@contextmanager
|
||||
def with_factory(self, factory: type[NamedTuple]) -> Iterator[None]:
|
||||
"""Temporarily set the row factory to a specific type."""
|
||||
original_factory = self.db._connection().row_factory
|
||||
self.db._connection().row_factory = lambda _, row: factory(*row)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.db._connection().row_factory = original_factory
|
||||
|
||||
def get_genres(self, genre: str) -> str:
|
||||
for separator in self.separators:
|
||||
if separator in genre:
|
||||
return genre.replace(separator, MULTI_VALUE_DELIMITER)
|
||||
|
||||
return genre
|
||||
|
||||
def _migrate_data(self, table: str) -> None:
|
||||
"""Migrate legacy genre values to the multi-value genres field."""
|
||||
|
||||
with self.db.transaction() as tx, self.with_factory(GenreRow):
|
||||
rows: list[GenreRow] = tx.query( # type: ignore[assignment]
|
||||
f"""
|
||||
SELECT id, genre, genres
|
||||
FROM {table}
|
||||
WHERE genre IS NOT NULL AND genre != ''
|
||||
"""
|
||||
)
|
||||
|
||||
total = len(rows)
|
||||
to_migrate = [e for e in rows if not e.genres]
|
||||
if not to_migrate:
|
||||
return
|
||||
|
||||
migrated = total - len(to_migrate)
|
||||
|
||||
ui.print_(f"Migrating genres for {total} {table}...")
|
||||
for batch in chunks(to_migrate, 100):
|
||||
with self.db.transaction() as tx:
|
||||
tx.mutate_many(
|
||||
f"UPDATE {table} SET genres = ? WHERE id = ?",
|
||||
[(self.get_genres(e.genre), e.id) for e in batch],
|
||||
)
|
||||
|
||||
migrated += len(batch)
|
||||
|
||||
ui.print_(
|
||||
f" Migrated {migrated} {table} "
|
||||
f"({migrated}/{total} processed)..."
|
||||
)
|
||||
|
||||
ui.print_(f"Migration complete: {migrated} of {total} {table} updated")
|
||||
|
|
@ -156,6 +156,8 @@ class TestHelper(RunMixin, ConfigMixin):
|
|||
fixtures.
|
||||
"""
|
||||
|
||||
lib: Library
|
||||
|
||||
resource_path = Path(os.fsdecode(_common.RSRC)) / "full.mp3"
|
||||
|
||||
db_on_disk: ClassVar[bool] = False
|
||||
|
|
|
|||
0
test/library/__init__.py
Normal file
0
test/library/__init__.py
Normal file
54
test/library/test_migrations.py
Normal file
54
test/library/test_migrations.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
import pytest
|
||||
|
||||
from beets.library.migrations import MultiGenreFieldMigration
|
||||
from beets.library.models import Album, Item
|
||||
from beets.test.helper import TestHelper
|
||||
|
||||
|
||||
class TestMultiGenreFieldMigration:
|
||||
@pytest.fixture
|
||||
def helper(self, monkeypatch):
|
||||
monkeypatch.setattr("beets.library.library.Library._migrations", ())
|
||||
helper = TestHelper()
|
||||
helper.setup_beets()
|
||||
|
||||
monkeypatch.setattr(
|
||||
"beets.library.library.Library._migrations",
|
||||
((MultiGenreFieldMigration, (Item, Album)),),
|
||||
)
|
||||
yield helper
|
||||
|
||||
helper.teardown_beets()
|
||||
|
||||
def test_migrates_only_rows_with_missing_genres(self, helper: TestHelper):
|
||||
helper.config["lastgenre"]["separator"] = " - "
|
||||
|
||||
expected_item_genres = []
|
||||
for genre, initial_genres, expected_genres in [
|
||||
# already existing value is not overwritten
|
||||
("Item Rock", ("Ignored",), ("Ignored",)),
|
||||
("", (), ()),
|
||||
("Rock", (), ("Rock",)),
|
||||
# multiple genres are split on one of default separators
|
||||
("Item Rock; Alternative", (), ("Item Rock", "Alternative")),
|
||||
# multiple genres are split the first (lastgenre) separator ONLY
|
||||
("Item - Rock, Alternative", (), ("Item", "Rock, Alternative")),
|
||||
]:
|
||||
helper.add_item(genre=genre, genres=initial_genres)
|
||||
expected_item_genres.append(expected_genres)
|
||||
|
||||
unmigrated_album = helper.add_album(
|
||||
genre="Album Rock / Alternative", genres=[]
|
||||
)
|
||||
expected_item_genres.append(("Album Rock", "Alternative"))
|
||||
|
||||
helper.lib._migrate()
|
||||
|
||||
actual_item_genres = [tuple(i.genres) for i in helper.lib.items()]
|
||||
assert actual_item_genres == expected_item_genres
|
||||
|
||||
unmigrated_album.load()
|
||||
assert unmigrated_album.genres == ["Album Rock", "Alternative"]
|
||||
|
||||
assert helper.lib.get_migration_state("multi_genre_field_items")
|
||||
assert helper.lib.get_migration_state("multi_genre_field_albums")
|
||||
Loading…
Reference in a new issue