Implement automatic database-level genre migration

- Add Library._make_table() override to automatically migrate genres when database schema is updated
- Migration splits comma/semicolon/slash-separated genre strings into genres list
- Writes changes to both database and media files with progress reporting
- Remove lazy migration from correct_list_fields() - now handled at database level
- Remove migration-specific tests (migration is now automatic, not lazy)
- Update changelog to reflect automatic migration behavior

Related PR review comment changes:
- Replace _is_valid with _filter_valid method in lastgenre plugin
- Use unique_list and remove genre field from Beatport plugin
- Simplify LastGenre tests - remove separator logic
- Document separator deprecation in lastgenre plugin
- Add deprecation warning for genre parameter in Info.__init__()
This commit is contained in:
dunkla 2025-12-28 20:12:15 +01:00
parent d565524965
commit 9fc90dd8f6
10 changed files with 164 additions and 173 deletions

View file

@ -166,37 +166,7 @@ def correct_list_fields(m: LibModel) -> None:
elif list_val:
setattr(m, single_field, list_val[0])
def migrate_legacy_genres() -> None:
"""Migrate comma-separated genre strings to genres list.
For users upgrading from previous versions, their genre field may
contain comma-separated values (e.g., "Rock, Alternative, Indie").
This migration splits those values into the genres list on first access,
avoiding the need to reimport the entire library.
"""
genre_val = getattr(m, "genre", "")
genres_val = getattr(m, "genres", [])
# Only migrate if genres list is empty and genre contains separators
if not genres_val and genre_val:
# Try common separators used by lastgenre and other tools
for separator in [", ", "; ", " / "]:
if separator in genre_val:
# Split and clean the genre string
split_genres = [
g.strip()
for g in genre_val.split(separator)
if g.strip()
]
if len(split_genres) > 1:
# Found a valid split - populate genres list
setattr(m, "genres", split_genres)
# Clear genre so ensure_first_value sets it correctly
setattr(m, "genre", "")
break
ensure_first_value("albumtype", "albumtypes")
migrate_legacy_genres()
ensure_first_value("genre", "genres")
if hasattr(m, "mb_artistids"):

View file

@ -16,6 +16,7 @@
from __future__ import annotations
import warnings
from copy import deepcopy
from dataclasses import dataclass
from functools import cached_property
@ -80,6 +81,26 @@ class Info(AttrDict[Any]):
media: str | None = None,
**kwargs,
) -> None:
if genre:
warnings.warn(
"The 'genre' parameter is deprecated. Use 'genres' (list) instead.",
DeprecationWarning,
stacklevel=2,
)
if not genres:
for separator in [", ", "; ", " / "]:
if separator in genre:
split_genres = [
g.strip()
for g in genre.split(separator)
if g.strip()
]
if len(split_genres) > 1:
genres = split_genres
break
if not genres:
genres = [genre]
self.album = album
self.artist = artist
self.artist_credit = artist_credit
@ -91,7 +112,7 @@ class Info(AttrDict[Any]):
self.artists_sort = artists_sort or []
self.data_source = data_source
self.data_url = data_url
self.genre = genre
self.genre = None
self.genres = genres or []
self.media = media
self.update(kwargs)

View file

@ -5,14 +5,19 @@ from typing import TYPE_CHECKING
import platformdirs
import beets
from beets import dbcore
from beets import dbcore, logging, ui
from beets.autotag import correct_list_fields
from beets.util import normpath
from .models import Album, Item
from .queries import PF_KEY_DEFAULT, parse_query_parts, parse_query_string
if TYPE_CHECKING:
from beets.dbcore import Results
from collections.abc import Mapping
from beets.dbcore import Results, types
log = logging.getLogger("beets")
class Library(dbcore.Database):
@ -142,3 +147,88 @@ class Library(dbcore.Database):
item_or_id if isinstance(item_or_id, int) else item_or_id.album_id
)
return self._get(Album, album_id) if album_id else None
# Database schema migration.
def _make_table(self, table: str, fields: Mapping[str, types.Type]):
"""Set up the schema of the database, and migrate genres if needed."""
with self.transaction() as tx:
rows = tx.query(f"PRAGMA table_info({table})")
current_fields = {row[1] for row in rows}
field_names = set(fields.keys())
# Check if genres column is being added to items table
genres_being_added = (
table == "items"
and "genres" in field_names
and "genres" not in current_fields
and "genre" in current_fields
)
# Call parent to create/update table
super()._make_table(table, fields)
# Migrate genre to genres if genres column was just added
if genres_being_added:
self._migrate_genre_to_genres()
def _migrate_genre_to_genres(self):
"""Migrate comma-separated genre strings to genres list.
This migration runs automatically when the genres column is first
created in the database. It splits comma-separated genre values
and writes the changes to both the database and media files.
"""
items = list(self.items())
migrated_count = 0
total_items = len(items)
if total_items == 0:
return
ui.print_(f"Migrating genres for {total_items} items...")
for index, item in enumerate(items, 1):
genre_val = item.genre or ""
genres_val = item.genres or []
# Check if migration is needed
needs_migration = False
split_genres = []
if not genres_val and genre_val:
for separator in [", ", "; ", " / "]:
if separator in genre_val:
split_genres = [
g.strip()
for g in genre_val.split(separator)
if g.strip()
]
if len(split_genres) > 1:
needs_migration = True
break
if needs_migration:
migrated_count += 1
# Show progress every 100 items
if migrated_count % 100 == 0:
ui.print_(
f" Migrated {migrated_count} items "
f"({index}/{total_items} processed)..."
)
# Migrate using the same logic as correct_list_fields
correct_list_fields(item)
item.store()
# Write to media file
try:
item.try_write()
except Exception as e:
log.warning(
"Could not write genres to {}: {}",
item.path,
e,
)
ui.print_(
f"Migration complete: {migrated_count} of {total_items} items "
f"updated with comma-separated genres"
)

View file

@ -33,6 +33,7 @@ import beets
import beets.ui
from beets.autotag.hooks import AlbumInfo, TrackInfo
from beets.metadata_plugins import MetadataSourcePlugin
from beets.util import unique_list
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Sequence
@ -235,8 +236,7 @@ class BeatportObject:
self.artists = [(x["id"], str(x["name"])) for x in data["artists"]]
if "genres" in data:
genre_list = [str(x["name"]) for x in data["genres"]]
# Remove duplicates while preserving order
self.genres = list(dict.fromkeys(genre_list))
self.genres = unique_list(genre_list)
def artists_str(self) -> str | None:
if self.artists is not None:
@ -255,7 +255,6 @@ class BeatportRelease(BeatportObject):
label_name: str | None
category: str | None
url: str | None
genre: str | None
tracks: list[BeatportTrack] | None = None
@ -265,7 +264,6 @@ class BeatportRelease(BeatportObject):
self.catalog_number = data.get("catalogNumber")
self.label_name = data.get("label", {}).get("name")
self.category = data.get("category")
self.genre = data.get("genre")
if "slug" in data:
self.url = (
@ -287,7 +285,6 @@ class BeatportTrack(BeatportObject):
track_number: int | None
bpm: str | None
initial_key: str | None
genre: str | None
def __init__(self, data: JSONDict):
super().__init__(data)
@ -316,8 +313,7 @@ class BeatportTrack(BeatportObject):
else:
genre_list = []
# Remove duplicates while preserving order
self.genres = list(dict.fromkeys(genre_list))
self.genres = unique_list(genre_list)
class BeatportPlugin(MetadataSourcePlugin):
@ -490,7 +486,6 @@ class BeatportPlugin(MetadataSourcePlugin):
media="Digital",
data_source=self.data_source,
data_url=release.url,
genre=release.genre,
genres=release.genres,
year=release_date.year if release_date else None,
month=release_date.month if release_date else None,
@ -516,7 +511,6 @@ class BeatportPlugin(MetadataSourcePlugin):
data_url=track.url,
bpm=track.bpm,
initial_key=track.initial_key,
genre=track.genre,
genres=track.genres,
)

View file

@ -38,6 +38,8 @@ from beets.library import Album, Item
from beets.util import plurality, unique_list
if TYPE_CHECKING:
from collections.abc import Iterable
from beets.library import LibModel
LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY)
@ -206,7 +208,7 @@ class LastGenrePlugin(plugins.BeetsPlugin):
- Returns an empty list if the input tags list is empty.
- If canonicalization is enabled, it extends the list by incorporating
parent genres from the canonicalization tree. When a whitelist is set,
only parent tags that pass a validity check (_is_valid) are included;
only parent tags that pass the whitelist filter are included;
otherwise, it adds the oldest ancestor. Adding parent tags is stopped
when the count of tags reaches the configured limit (count).
- The tags list is then deduplicated to ensure only unique genres are
@ -230,11 +232,9 @@ class LastGenrePlugin(plugins.BeetsPlugin):
# Add parents that are in the whitelist, or add the oldest
# ancestor if no whitelist
if self.whitelist:
parents = [
x
for x in find_parents(tag, self.c14n_branches)
if self._is_valid(x)
]
parents = self._filter_valid(
find_parents(tag, self.c14n_branches)
)
else:
parents = [find_parents(tag, self.c14n_branches)[-1]]
@ -256,7 +256,7 @@ class LastGenrePlugin(plugins.BeetsPlugin):
# c14n only adds allowed genres but we may have had forbidden genres in
# the original tags list
valid_tags = [t for t in tags if self._is_valid(t)]
valid_tags = self._filter_valid(tags)
return valid_tags[:count]
def fetch_genre(self, lastfm_obj):
@ -266,15 +266,16 @@ class LastGenrePlugin(plugins.BeetsPlugin):
min_weight = self.config["min_weight"].get(int)
return self._tags_for(lastfm_obj, min_weight)
def _is_valid(self, genre: str) -> bool:
"""Check if the genre is valid.
def _filter_valid(self, genres: Iterable[str]) -> list[str]:
"""Filter genres based on whitelist.
Depending on the whitelist property, valid means a genre is in the
whitelist or any genre is allowed.
Returns all genres if no whitelist is configured, otherwise returns
only genres that are in the whitelist.
"""
if genre and (not self.whitelist or genre.lower() in self.whitelist):
return True
return False
if not self.whitelist:
return list(genres)
return [g for g in genres if g.lower() in self.whitelist]
# Cached last.fm entity lookups.
@ -456,7 +457,6 @@ class LastGenrePlugin(plugins.BeetsPlugin):
):
return valid_genres, "original fallback"
# Return fallback as a list.
if fallback := self.config["fallback"].get():
return [fallback], "fallback"
@ -472,7 +472,6 @@ class LastGenrePlugin(plugins.BeetsPlugin):
obj.genres, label = self._get_genre(obj)
self._log.debug("Resolved ({}): {}", label, obj.genres)
ui.show_model_changes(obj, fields=["genres"], print_obj=False)
@singledispatchmethod

View file

@ -21,12 +21,11 @@ New features:
updated to populate the ``genres`` field as a list.
**Migration**: Existing libraries with comma-separated, semicolon-separated,
or slash-separated genre strings (e.g., ``"Rock, Alternative, Indie"``) will
be automatically migrated to the ``genres`` list when items are accessed. No
manual reimport or ``mbsync`` is required. For users who prefer explicit
control, a new ``beet migrate genres`` command is available to migrate the
entire library at once. Use ``beet migrate genres --pretend`` to preview
changes before applying them.
or slash-separated genre strings (e.g., ``"Rock, Alternative, Indie"``) are
automatically migrated to the ``genres`` list when you first run beets after
upgrading. The migration runs once when the database schema is updated,
splitting genre strings and writing the changes to both the database and media
files. No manual action or ``mbsync`` is required.
- :doc:`plugins/ftintitle`: Added argument for custom feat. words in ftintitle.
- :doc:`plugins/ftintitle`: Added album template value ``album_artist_no_feat``.
@ -111,6 +110,10 @@ Other changes:
unavailable, enabling ``importorskip`` usage in pytest setup.
- Finally removed gmusic plugin and all related code/docs as the Google Play
Music service was shut down in 2020.
- :doc:`plugins/lastgenre`: The ``separator`` configuration option is
deprecated. Genres are now stored as a list in the ``genres`` field and
written to files as individual genre tags. The separator option has no effect
and will be removed in a future version.
2.5.1 (October 14, 2025)
------------------------

View file

@ -90,9 +90,8 @@ By default, the plugin chooses the most popular tag on Last.fm as a genre. If
you prefer to use a *list* of popular genre tags, you can increase the number of
the ``count`` config option.
Lists of up to *count* genres will then be used instead of single genres. The
genres are separated by commas by default, but you can change this with the
``separator`` config option.
Lists of up to *count* genres will be stored in the ``genres`` field as a list
and written to your media files as separate genre tags.
Last.fm_ provides a popularity factor, a.k.a. *weight*, for each tag ranging
from 100 for the most popular tag down to 0 for the least popular. The plugin
@ -192,7 +191,16 @@ file. The available options are:
Default: ``no``.
- **source**: Which entity to look up in Last.fm. Can be either ``artist``,
``album`` or ``track``. Default: ``album``.
- **separator**: A separator for multiple genres. Default: ``', '``.
- **separator**:
.. deprecated:: 2.6
The ``separator`` option is deprecated. Genres are now stored as a list in
the ``genres`` field and written to files as individual genre tags. This
option has no effect and will be removed in a future version.
Default: ``', '``.
- **whitelist**: The filename of a custom genre list, ``yes`` to use the
internal whitelist, or ``no`` to consider all genres valid. Default: ``yes``.
- **title_case**: Convert the new tags to TitleCase before saving. Default:

View file

@ -352,7 +352,7 @@ class DGAlbumInfoTest(BeetsTestCase):
release = self._make_release_from_positions(["1", "2"])
d = DiscogsPlugin().get_album_info(release)
assert d.genre == "GENRE1, GENRE2"
assert d.genres == ["GENRE1", "GENRE2"]
assert d.style == "STYLE1, STYLE2"
def test_append_style_to_genre(self):
@ -361,7 +361,7 @@ class DGAlbumInfoTest(BeetsTestCase):
release = self._make_release_from_positions(["1", "2"])
d = DiscogsPlugin().get_album_info(release)
assert d.genre == "GENRE1, GENRE2, STYLE1, STYLE2"
assert d.genres == ["GENRE1", "GENRE2", "STYLE1", "STYLE2"]
assert d.style == "STYLE1, STYLE2"
def test_append_style_to_genre_no_style(self):
@ -371,7 +371,7 @@ class DGAlbumInfoTest(BeetsTestCase):
release.data["styles"] = []
d = DiscogsPlugin().get_album_info(release)
assert d.genre == "GENRE1, GENRE2"
assert d.genres == ["GENRE1", "GENRE2"]
assert d.style is None
def test_strip_disambiguation(self):

View file

@ -401,25 +401,7 @@ class LastGenrePluginTest(PluginTestCase):
},
(["fallback genre"], "fallback"),
),
# 9 - null charachter as separator
(
{
"force": True,
"keep_existing": True,
"source": "album",
"whitelist": True,
"separator": "\u0000",
"canonical": False,
"prefer_specific": False,
"count": 10,
},
"Blues",
{
"album": ["Jazz"],
},
(["Blues", "Jazz"], "keep + album, whitelist"),
),
# 10 - limit a lot of results
# 9 - limit a lot of results
(
{
"force": True,
@ -429,7 +411,6 @@ class LastGenrePluginTest(PluginTestCase):
"count": 5,
"canonical": False,
"prefer_specific": False,
"separator": ", ",
},
"original unknown, Blues, Rock, Folk, Metal",
{
@ -440,23 +421,7 @@ class LastGenrePluginTest(PluginTestCase):
"keep + album, whitelist",
),
),
# 11 - force off does not rely on configured separator
(
{
"force": False,
"keep_existing": False,
"source": "album",
"whitelist": True,
"count": 2,
"separator": ", ",
},
"not ; configured | separator",
{
"album": ["Jazz", "Bebop"],
},
(["not ; configured | separator"], "keep any, no-force"),
),
# 12 - fallback to next stage (artist) if no allowed original present
# 10 - fallback to next stage (artist) if no allowed original present
# and no album genre were fetched.
(
{
@ -476,7 +441,7 @@ class LastGenrePluginTest(PluginTestCase):
},
(["Jazz"], "keep + artist, whitelist"),
),
# 13 - canonicalization transforms non-whitelisted genres to canonical forms
# 11 - canonicalization transforms non-whitelisted genres to canonical forms
#
# "Acid Techno" is not in the default whitelist, thus gets resolved "up" in the
# tree to "Techno" and "Electronic".
@ -496,7 +461,7 @@ class LastGenrePluginTest(PluginTestCase):
},
(["Techno", "Electronic"], "album, whitelist"),
),
# 14 - canonicalization transforms whitelisted genres to canonical forms and
# 12 - canonicalization transforms whitelisted genres to canonical forms and
# includes originals
#
# "Detroit Techno" is in the default whitelist, thus it stays and and also gets
@ -528,7 +493,7 @@ class LastGenrePluginTest(PluginTestCase):
"keep + album, whitelist",
),
),
# 15 - canonicalization transforms non-whitelisted original genres to canonical
# 13 - canonicalization transforms non-whitelisted original genres to canonical
# forms and deduplication works.
#
# "Cosmic Disco" is not in the default whitelist, thus gets resolved "up" in the
@ -581,25 +546,11 @@ def test_get_genre(config_values, item_genre, mock_genres, expected_result):
plugin.setup() # Loads default whitelist and canonicalization tree
item = _common.item()
# Set genres as a list - if item_genre is a string, convert it to list
if item_genre:
# For compatibility with old separator-based tests, split if needed
if (
"separator" in config_values
and config_values["separator"] in item_genre
):
sep = config_values["separator"]
item.genres = [
g.strip() for g in item_genre.split(sep) if g.strip()
]
if ", " in item_genre:
item.genres = [g.strip() for g in item_genre.split(", ")]
else:
# Assume comma-separated if no specific separator
if ", " in item_genre:
item.genres = [
g.strip() for g in item_genre.split(", ") if g.strip()
]
else:
item.genres = [item_genre]
item.genres = [item_genre]
else:
item.genres = []

View file

@ -543,48 +543,3 @@ class TestGenreSync:
assert item.genres == []
assert item.genre == ""
def test_migrate_comma_separated_genres(self):
"""Migrate legacy comma-separated genre strings."""
item = Item(genre="Rock, Alternative, Indie", genres=[])
correct_list_fields(item)
# Should split into genres list
assert item.genres == ["Rock", "Alternative", "Indie"]
# Genre becomes first item after migration
assert item.genre == "Rock"
def test_migrate_semicolon_separated_genres(self):
"""Migrate legacy semicolon-separated genre strings."""
item = Item(genre="Rock; Alternative; Indie", genres=[])
correct_list_fields(item)
assert item.genres == ["Rock", "Alternative", "Indie"]
assert item.genre == "Rock"
def test_migrate_slash_separated_genres(self):
"""Migrate legacy slash-separated genre strings."""
item = Item(genre="Rock / Alternative / Indie", genres=[])
correct_list_fields(item)
assert item.genres == ["Rock", "Alternative", "Indie"]
assert item.genre == "Rock"
def test_no_migration_when_genres_exists(self):
"""Don't migrate if genres list already populated."""
item = Item(genre="Jazz, Blues", genres=["Rock", "Pop"])
correct_list_fields(item)
# Existing genres list should be preserved
# The full genre string is prepended (migration doesn't run when genres exists)
assert item.genres == ["Jazz, Blues", "Rock", "Pop"]
assert item.genre == "Jazz, Blues"
def test_no_migration_single_genre(self):
"""Don't split single genres without separators."""
item = Item(genre="Rock", genres=[])
correct_list_fields(item)
# Single genre (no separator) should not trigger migration
assert item.genres == ["Rock"]
assert item.genre == "Rock"