diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 6f657fb48..e5d1f837e 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -34,6 +34,7 @@ from beets.util import plurality, unique_list from .client import LastFmClient from .loaders import DataFileLoader +from .processing import GenreProcessor from .utils import make_tunelog if TYPE_CHECKING: @@ -63,6 +64,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): "prefer_specific": False, "title_case": True, "pretend": False, + "blacklist": False, } ) self.setup() @@ -73,17 +75,18 @@ class LastGenrePlugin(plugins.BeetsPlugin): self.import_stages = [self.imported] self._tunelog = make_tunelog(self._log) - self.client = LastFmClient( - self._log, self.config["min_weight"].get(int) - ) loader = DataFileLoader.from_config( self.config, self._log, Path(__file__).parent ) - self.whitelist = loader.whitelist + self.processor = GenreProcessor(loader.whitelist, loader.blacklist) self.c14n_branches = loader.c14n_branches self.canonicalize = loader.canonicalize + self.client = LastFmClient( + self._log, self.config["min_weight"].get(int), self.processor + ) + @property def sources(self) -> tuple[str, ...]: """A tuple of allowed genre sources. May contain 'track', @@ -128,7 +131,9 @@ class LastGenrePlugin(plugins.BeetsPlugin): continue return [candidate] - def _resolve_genres(self, tags: list[str]) -> list[str]: + def _resolve_genres( + self, tags: list[str], artist: str | None = None + ) -> list[str]: """Canonicalize, sort and filter a list of genres. - Returns an empty list if the input tags list is empty. @@ -157,11 +162,11 @@ class LastGenrePlugin(plugins.BeetsPlugin): for tag in tags: # Add parents that are in the whitelist, or add the oldest # ancestor if no whitelist - if self.whitelist: + if self.processor.whitelist: parents = [ x for x in self.find_parents(tag, self.c14n_branches) - if self._is_valid(x) + if self.processor.is_valid(x) ] else: parents = [self.find_parents(tag, self.c14n_branches)[-1]] @@ -184,19 +189,14 @@ class LastGenrePlugin(plugins.BeetsPlugin): # c14n only adds allowed genres but we may have had forbidden genres in # the original tags list - valid_tags = [t for t in tags if self._is_valid(t)] + valid_tags = [ + t + for t in tags + if self.processor.is_valid(t) + and not self.processor.is_forbidden(t, artist=artist) + ] return valid_tags[:count] - def _is_valid(self, genre: str) -> bool: - """Check if the genre is valid. - - Depending on the whitelist property, valid means a genre is in the - whitelist or any genre is allowed. - """ - if genre and (not self.whitelist or genre.lower() in self.whitelist): - return True - return False - # Main processing: _get_genre() and helpers. def _format_and_stringify(self, tags: list[str]) -> str: @@ -221,13 +221,13 @@ class LastGenrePlugin(plugins.BeetsPlugin): return [g for g in item_genre if g] def _combine_resolve_and_log( - self, old: list[str], new: list[str] + self, old: list[str], new: list[str], artist: str | None = None ) -> list[str]: """Combine old and new genres and process via _resolve_genres.""" self._log.debug("raw last.fm tags: {}", new) self._log.debug("existing genres taken into account: {}", old) combined = old + new - return self._resolve_genres(combined) + return self._resolve_genres(combined, artist=artist) def _get_genre(self, obj: LibModel) -> tuple[str | None, ...]: """Get the final genre string for an Album or Item object. @@ -253,11 +253,14 @@ class LastGenrePlugin(plugins.BeetsPlugin): stage_label: str, keep_genres: list[str], new_genres: list[str] ) -> tuple[str, str] | None: """Try to resolve genres for a given stage and log the result.""" + artist = getattr(obj, "albumartist", None) or getattr( + obj, "artist", None + ) resolved_genres = self._combine_resolve_and_log( - keep_genres, new_genres + keep_genres, new_genres, artist=artist ) if resolved_genres: - suffix = "whitelist" if self.whitelist else "any" + suffix = "whitelist" if self.processor.whitelist else "any" label = f"{stage_label}, {suffix}" if keep_genres: label = f"keep + {label}" @@ -357,7 +360,9 @@ class LastGenrePlugin(plugins.BeetsPlugin): # Nothing found, leave original if configured and valid. if obj.genre and self.config["keep_existing"].get(bool): - if not self.whitelist or self._is_valid(obj.genre.lower()): + if not self.processor.whitelist or self.processor.is_valid( + obj.genre.lower() + ): return obj.genre, "original fallback" # If the original genre doesn't match a whitelisted genre, check # if we can canonicalize it to find a matching, whitelisted genre! diff --git a/beetsplug/lastgenre/client.py b/beetsplug/lastgenre/client.py index bff472dc0..0db482a45 100644 --- a/beetsplug/lastgenre/client.py +++ b/beetsplug/lastgenre/client.py @@ -31,6 +31,7 @@ if TYPE_CHECKING: from beets.logging import Logger + from .processing import GenreProcessor from .types import GenreCache LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY) @@ -45,15 +46,17 @@ PYLAST_EXCEPTIONS = ( class LastFmClient: """Client for fetching genres from Last.fm.""" - def __init__(self, log: Logger, min_weight: int): + def __init__(self, log: Logger, min_weight: int, processor: GenreProcessor): """Initialize the client. The min_weight parameter filters tags by their minimum weight. + The processor handles genre validation and filtering. """ self._log = log self._tunelog = make_tunelog(log) self._min_weight = min_weight self._genre_cache: GenreCache = {} + self.processor = processor def fetch_genre( self, lastfm_obj: pylast.Album | pylast.Artist | pylast.Track @@ -125,6 +128,22 @@ class LastFmClient: genre = self._genre_cache[key] self._tunelog("last.fm (unfiltered) {} tags: {}", entity, genre) + + # Filter forbidden genres + if genre and len(args) >= 1: + # For all current lastfm API calls, the first argument is always the artist: + # - get_album(artist, album) + # - get_artist(artist) + # - get_track(artist, title) + artist = args[0] + filtered_genre = [ + g for g in genre if not self.processor.is_forbidden(g, artist) + ] + if filtered_genre != genre: + log_filtered = set(genre) - set(filtered_genre) + self._tunelog("blacklisted: {}", log_filtered) + genre = filtered_genre + return genre def fetch_album_genre(self, albumartist: str, albumtitle: str) -> list[str]: diff --git a/beetsplug/lastgenre/loaders.py b/beetsplug/lastgenre/loaders.py index 1f0720eeb..e8a4efd20 100644 --- a/beetsplug/lastgenre/loaders.py +++ b/beetsplug/lastgenre/loaders.py @@ -18,17 +18,23 @@ from __future__ import annotations +import re +from collections import defaultdict from pathlib import Path from typing import TYPE_CHECKING, Any import yaml +from beets.ui import UserError + +from .utils import make_tunelog + if TYPE_CHECKING: from confuse import ConfigView from beets.logging import Logger - from .types import CanonTree, Whitelist + from .types import Blacklist, CanonTree, RawBlacklist, Whitelist class DataFileLoader: @@ -41,16 +47,19 @@ class DataFileLoader: whitelist: Whitelist, c14n_branches: CanonTree, canonicalize: bool, + blacklist: Blacklist, ): """Initialize with pre-loaded data. Use from_config() classmethod to construct from plugin config. """ self._log = log + self._tunelog = make_tunelog(log) self._plugin_dir = plugin_dir self.whitelist = whitelist self.c14n_branches = c14n_branches self.canonicalize = canonicalize + self.blacklist = blacklist @classmethod def from_config( @@ -80,7 +89,12 @@ class DataFileLoader: config["prefer_specific"].get(bool), ) - return cls(log, plugin_dir, whitelist, c14n_branches, canonicalize) + # Load blacklist + blacklist = cls._load_blacklist(log, config["blacklist"].get()) + + return cls( + log, plugin_dir, whitelist, c14n_branches, canonicalize, blacklist + ) @staticmethod def _load_whitelist( @@ -164,3 +178,89 @@ class DataFileLoader: DataFileLoader.flatten_tree(sub, path, branches) else: branches.append([*path, str(elem)]) + + @staticmethod + def _load_blacklist( + log: Logger, config_value: str | bool | None + ) -> Blacklist: + """Load the blacklist from a configured file path. + + For maximum compatibility with regex patterns, a custom format is used: + - Each section starts with an artist name, followed by a colon. + - Subsequent lines are indented (at least one space, typically 4 spaces) and + contain a regex pattern to match a genre. + - A '*' key for artist can be used to specify global forbidden genres. + + Returns a compiled blacklist dictionary mapping artist names to lists of + case-insensitive regex patterns. + + Example blacklist file format: + Artist Name: + .*rock.* + .*metal.* + Another Artist Name: + ^jazz$ + *: + spoken word + comedy + + Raises: + UserError: if the file format is invalid. + """ + blacklist_raw: RawBlacklist = defaultdict(list) + bl_filename = config_value + if not bl_filename or not isinstance(bl_filename, str): + return {} + + tunelog = make_tunelog(log) + log.debug("Loading blacklist file {0}", bl_filename) + section = None + with Path(bl_filename).expanduser().open(encoding="utf-8") as f: + for lineno, line in enumerate(f, 1): + line = line.lower() + if not line.strip() or line.lstrip().startswith("#"): + continue + if not line.startswith(" "): + # Section header + if not line.rstrip().endswith(":"): + raise UserError( + f"Malformed blacklist section header " + f"at line {lineno}: {line}" + ) + section = line.rstrip(":\r\n") + else: + # Pattern line: must be indented (at least one space) + if section is None: + raise UserError( + f"Blacklist regex pattern line before any section header " + f"at line {lineno}: {line}" + ) + blacklist_raw[section].append(line.strip()) + tunelog("Blacklist: {}", blacklist_raw) + + # Compile regex patterns + return DataFileLoader._compile_blacklist_patterns(blacklist_raw) + + @staticmethod + def _compile_blacklist_patterns( + blacklist: RawBlacklist, + ) -> Blacklist: + """Compile blacklist patterns into regex objects. + + Tries regex compilation first, falls back to literal string matching. That way + users can use regexes for flexible matching but also simple strings without + worrying about regex syntax. All patterns are case-insensitive. + """ + compiled_blacklist = defaultdict(list) + for artist, patterns in blacklist.items(): + compiled_patterns = [] + for pattern in patterns: + try: + compiled_patterns.append(re.compile(pattern, re.IGNORECASE)) + except re.error: + escaped_pattern = re.escape(pattern) + compiled_patterns.append( + re.compile(escaped_pattern, re.IGNORECASE) + ) + compiled_blacklist[artist] = compiled_patterns + return compiled_blacklist diff --git a/beetsplug/lastgenre/processing.py b/beetsplug/lastgenre/processing.py new file mode 100644 index 000000000..cb50e51d3 --- /dev/null +++ b/beetsplug/lastgenre/processing.py @@ -0,0 +1,76 @@ +# This file is part of beets. +# Copyright 2026, J0J0 Todos. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +"""Genre processing for the lastgenre plugin. + +Provides GenreProcessor for validation and transformation of genre names. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .types import Blacklist, Whitelist + + +class GenreProcessor: + """Handles genre validation and transformation. + + - Whitelist validation: checking if genres are in the allowed set + - Blacklist filtering: rejecting forbidden genres (global or artist-specific) + - Future: normalization/aliasing (e.g., "D&B" -> "Drum And Bass") + """ + + def __init__( + self, + whitelist: Whitelist, + blacklist: Blacklist, + ): + self.whitelist = whitelist + self.blacklist = blacklist + + def is_valid(self, genre: str) -> bool: + """Check if genre passes whitelist validation.""" + if genre and (not self.whitelist or genre.lower() in self.whitelist): + return True + return False + + def is_forbidden(self, genre: str, artist: str | None = None) -> bool: + """Check if genre matches blacklist patterns. + + Checks global patterns ("*") and artist-specific patterns if artist + is provided. Case-insensitive matching. + """ + if not self.blacklist: + return False + + genre = genre.lower() + + # Check global forbidden patterns + if "*" in self.blacklist: + for pattern in self.blacklist["*"]: + if pattern.search(genre): + return True + + # Check artist-specific forbidden patterns + if artist: + artist = artist.lower() + if artist in self.blacklist: + for pattern in self.blacklist[artist]: + if pattern.search(genre): + return True + + return False diff --git a/beetsplug/lastgenre/types.py b/beetsplug/lastgenre/types.py index f8fa2aba5..ff42fbd65 100644 --- a/beetsplug/lastgenre/types.py +++ b/beetsplug/lastgenre/types.py @@ -17,6 +17,8 @@ from __future__ import annotations +import re + Whitelist = set[str] """Set of valid genre names (lowercase). Empty set means all genres allowed.""" @@ -28,3 +30,11 @@ GenreCache = dict[str, list[str]] """Cache mapping entity keys to their genre lists. Keys are formatted as 'entity.arg1-arg2-...' (e.g., 'album.artist-title'). Values are lists of lowercase genre strings.""" + +RawBlacklist = dict[str, list[str]] +"""Artist name -> unparsed regex pattern strings for forbidden genres. +Key "*" for global patterns. Intermediate format before compilation.""" + +Blacklist = dict[str, list[re.Pattern[str]]] +"""Artist name -> compiled regex patterns for forbidden genres. +Key "*" for global patterns applying to all artists.""" diff --git a/docs/plugins/lastgenre.rst b/docs/plugins/lastgenre.rst index ace7caaf0..9d18d469c 100644 --- a/docs/plugins/lastgenre.rst +++ b/docs/plugins/lastgenre.rst @@ -161,6 +161,37 @@ genres remain, set ``whitelist: no``). If ``force`` is disabled the ``keep_existing`` option is simply ignored (since ``force: no`` means ``not touching`` existing tags anyway). +Blacklisting Genres +------------------- + +If you want to blacklist certain genres, they could simply be removed from the +``whitelist``. This way, they will never be fetched from Last.fm. To only forbid +them for a specific artist, the ``blacklist`` can be used. A possible +``blacklist`` file would look like this: + +.. code-block:: text + + fracture: + ^(heavy|black|power|death)?\s?(metal|rock)$|\w+-metal\d*$ + progressive metal + gilles peterson: + samba + bossa nova + *: + electronic + +A combination of regex patterns and plain genre names is possible. The ``*`` key +matches all artists that are not explicitly defined, which is effectively the +same as removing them from the whitelist (helpful if the default whitelist is +desired to be kept). + +Set the ``blacklist`` option to the path of a blacklist file to enable this +feature. + +.. attention:: + + Do not use single or double quotes around the genre names or regex patterns. + Configuration ------------- @@ -197,6 +228,9 @@ file. The available options are: internal whitelist, or ``no`` to consider all genres valid. Default: ``yes``. - **title_case**: Convert the new tags to TitleCase before saving. Default: ``yes``. +- **blacklist**: The path to a blacklist file that contains genres to exclude + from being set as genres for specific artists. See `Blacklisting Genres`_ for + more details. Default: ``no``. Running Manually ---------------- diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index 8d092f318..7ec118e4f 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -14,12 +14,17 @@ """Tests for the 'lastgenre' plugin.""" +import os +import re +import tempfile +from collections import defaultdict from unittest.mock import Mock, patch import pytest from beets.test import _common from beets.test.helper import PluginTestCase +from beets.ui import UserError from beetsplug import lastgenre @@ -42,7 +47,7 @@ class LastGenrePluginTest(PluginTestCase): self.plugin.setup() if not isinstance(whitelist, (bool, (str,))): # Explicit list of genres. - self.plugin.whitelist = whitelist + self.plugin.processor.whitelist = whitelist def test_default(self): """Fetch genres with whitelist and c14n deactivated""" @@ -598,3 +603,161 @@ def test_get_genre(config_values, item_genre, mock_genres, expected_result): # Run res = plugin._get_genre(item) assert res == expected_result + + +@pytest.mark.parametrize( + "blacklist_dict, artist, genre, expected_forbidden", + [ + # Global blacklist - simple word + ({"*": ["spoken word"]}, "Any Artist", "spoken word", True), + ({"*": ["spoken word"]}, "Any Artist", "jazz", False), + # Global blacklist - regex pattern + ({"*": [".*electronic.*"]}, "Any Artist", "ambient electronic", True), + ({"*": [".*electronic.*"]}, "Any Artist", "jazz", False), + # Artist-specific blacklist + ({"metallica": ["metal"]}, "Metallica", "metal", True), + ({"metallica": ["metal"]}, "Iron Maiden", "metal", False), + # Case insensitive matching + ({"metallica": ["metal"]}, "METALLICA", "METAL", True), + # Artist-specific blacklist - exact match + ({"metallica": ["^Heavy Metal$"]}, "Metallica", "classic metal", False), + # Combined global and artist-specific + ( + {"*": ["spoken word"], "metallica": ["metal"]}, + "Metallica", + "spoken word", + True, + ), + ( + {"*": ["spoken word"], "metallica": ["metal"]}, + "Metallica", + "metal", + True, + ), + # Complex regex pattern with multiple features (raw string) + ( + { + "fracture": [ + r"^(heavy|black|power|death)?\s?(metal|rock)$|\w+-metal\d*$" + ] + }, + "Fracture", + "power metal", + True, + ), + # Complex regex pattern with multiple features (regular string) + ( + {"amon tobin": ["d(rum)?[ n/]*b(ass)?"]}, + "Amon Tobin", + "dnb", + True, + ), + # Empty blacklist + ({}, "Any Artist", "any genre", False), + ], +) +def test_blacklist_patterns(blacklist_dict, artist, genre, expected_forbidden): + """Test blacklist pattern matching logic directly.""" + + # Initialize plugin + plugin = lastgenre.LastGenrePlugin() + + # Set up compiled blacklist directly (skipping file parsing) + compiled_blacklist = defaultdict(list) + for artist_name, patterns in blacklist_dict.items(): + compiled_blacklist[artist_name.lower()] = [ + re.compile(pattern) for pattern in patterns + ] + + plugin.processor.blacklist = compiled_blacklist + + # Test the is_forbidden method on processor + result = plugin.processor.is_forbidden(genre, artist) + assert result == expected_forbidden + + +@pytest.mark.parametrize( + "file_content, expected_blacklist", + [ + # Basic artist with pattern + ("metallica:\n metal", {"metallica": ["metal"]}), + # Global blacklist + ("*:\n spoken word", {"*": ["spoken word"]}), + # Multiple patterns per artist + ( + "metallica:\n metal\n .*rock.*", + {"metallica": ["metal", ".*rock.*"]}, + ), + # Comments and empty lines ignored + ( + "# comment\n*:\n spoken word\n\nmetallica:\n metal", + {"*": ["spoken word"], "metallica": ["metal"]}, + ), + # Case insensitive artist names + ("METALLICA:\n METAL", {"metallica": ["metal"]}), + # Invalid regex pattern that gets escaped + ("artist:\n [invalid(regex", {"artist": ["\\[invalid\\(regex"]}), + # Empty file + ("", {}), + ], +) +def test_blacklist_file_format(file_content, expected_blacklist): + """Test blacklist file format parsing.""" + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".txt", delete=False, encoding="utf-8" + ) as f: + f.write(file_content) + blacklist_file = f.name + + try: + # We don't need a plugin instance, it's a static method + mock_log = Mock() + blacklist_result = lastgenre.DataFileLoader._load_blacklist( + mock_log, blacklist_file + ) + + # Convert compiled regex patterns back to strings for comparison + string_blacklist = {} + for artist, compiled_patterns in blacklist_result.items(): + string_blacklist[artist] = [ + pattern.pattern for pattern in compiled_patterns + ] + + assert string_blacklist == expected_blacklist + + finally: + os.unlink(blacklist_file) + + +@pytest.mark.parametrize( + "invalid_content, expected_error_message", + [ + # Missing colon + ("metallica\n metal", "Malformed blacklist section header"), + # Pattern before section + (" metal\nmetallica:\n heavy metal", "before any section header"), + # Unindented pattern + ("metallica:\nmetal", "Malformed blacklist section header"), + ], +) +def test_blacklist_file_format_errors(invalid_content, expected_error_message): + """Test blacklist file format error handling.""" + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".txt", delete=False, encoding="utf-8" + ) as f: + f.write(invalid_content) + blacklist_file = f.name + + try: + # We don't need a plugin instance, it's a static method + mock_log = Mock() + + with pytest.raises(UserError) as exc_info: + lastgenre.DataFileLoader._load_blacklist(mock_log, blacklist_file) + + assert expected_error_message in str(exc_info.value) + + finally: + os.unlink(blacklist_file)