diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 8d9590948..0bfe5e170 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -58,7 +58,7 @@ if TYPE_CHECKING: Example: [['electronic', 'house'], ['electronic', 'techno']]""" RawIgnorelist = dict[str, list[str]] - """Mapping of artist name to list of raw regex/string patterns.""" + """Mapping of artist name (lowercased) to list of raw regex/string patterns.""" # Canonicalization tree processing. @@ -199,75 +199,54 @@ class LastGenrePlugin(plugins.BeetsPlugin): return c14n_branches, canonicalize def _load_ignorelist(self) -> Ignorelist: - """Load the genre ignorelist from a configured file path. + r"""Load the genre ignorelist from the beets config. - For maximum compatibility with regex patterns, a custom format is used: - - Each section starts with an artist name, followed by a colon. - - Subsequent lines are indented (at least one space, typically 4 spaces) - and contain a regex pattern to match a genre or a literal genre name. - - A '*' key for artist can be used to specify global forbidden genres. + Reads ``lastgenre.ignorelist`` as a mapping of artist names to lists + of regex / literal genre patterns. Use the quoted ``'*'`` key to + define globally ignored genres:: - Returns a compiled ignorelist dictionary mapping artist names to lists of - case-insensitive regex patterns. Returns empty dict if not configured. + lastgenre: + ignorelist: + '*': + - spoken word + - comedy + Artist Name: + - .*rock.* + - .*metal.* - Example ignorelist file format:: + Artist names are matched case-insensitively. Each pattern is tried as + a regex full-match first; if it is not valid regex it is treated as a + literal string. All patterns are case-insensitive. - Artist Name: - .*rock.* - .*metal.* - Another Artist Name: - ^jazz$ - *: - spoken word - comedy + **Note:** Because patterns are parsed as plain YAML scalars, + backslashes (e.g. ``\w``) should **not** be double-escaped, and + single/double quotes should be avoided. + + Returns an empty dict if ``ignorelist`` is not configured (``no`` / + ``false``). Raises: - UserError: if the ignorelist file cannot be read or if the file - format is invalid. + UserError: if the config value is not a mapping, or if an entry's + value is not a list of strings. """ - ignorelist_raw: RawIgnorelist = defaultdict(list) - ignorelist_file = self.config["ignorelist"].get() - if not ignorelist_file: + ignorelist_config = self.config["ignorelist"].get() + if not ignorelist_config: return {} - if not isinstance(ignorelist_file, str): + if not isinstance(ignorelist_config, dict): raise UserError( - "Invalid value for lastgenre.ignorelist: expected path string " - f"or 'no', got {ignorelist_file!r}" + "Invalid value for lastgenre.ignorelist: expected a mapping " + f"(artist \u2192 list of patterns), got {ignorelist_config!r}" ) - self._log.debug("Loading ignorelist file {}", ignorelist_file) - section = None - try: - with Path(ignorelist_file).expanduser().open(encoding="utf-8") as f: - for lineno, line in enumerate(f, 1): - if not line.strip() or line.lstrip().startswith("#"): - continue - if not line.startswith(" "): - # Section header - header = line.strip().lower() - if not header.endswith(":"): - raise UserError( - f"Malformed ignorelist section header " - f"at line {lineno}" - ) - section = header[:-1].rstrip() - if not section: - raise UserError( - f"Empty ignorelist section name " - f"at line {lineno}" - ) - else: - # Pattern line: must be indented (at least one space) - if section is None: - raise UserError( - f"Ignorelist pattern line before any section header " - f"at line {lineno}" - ) - ignorelist_raw[section].append(line.strip()) - except OSError as exc: - raise UserError( - f"Cannot read ignorelist file {ignorelist_file!r}: {exc}" - ) from exc + ignorelist_raw: RawIgnorelist = {} + for artist, patterns in ignorelist_config.items(): + if not isinstance(patterns, list): + raise UserError( + f"Invalid lastgenre.ignorelist entry for {artist!r}: " + f"expected a list of patterns, got {patterns!r}" + ) + ignorelist_raw[str(artist).lower()] = [str(p) for p in patterns] + self._log.extra_debug("Ignorelist: {}", ignorelist_raw) return self._compile_ignorelist_patterns(ignorelist_raw) @@ -561,6 +540,9 @@ class LastGenrePlugin(plugins.BeetsPlugin): ) if new_genres: stage_label = "multi-valued album artist" + stage_artist = ( + None # Already filtered per-artist in client + ) else: # For "Various Artists", pick the most popular track genre. item_genres = [] diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index bb993b295..a3e3d7f31 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -14,14 +14,13 @@ """Tests for the 'lastgenre' plugin.""" -import os import re -import tempfile from collections import defaultdict -from unittest.mock import Mock, patch +from unittest.mock import MagicMock, Mock, patch import pytest +from beets.library import Album from beets.test import _common from beets.test.helper import IOMixin, PluginTestCase from beets.ui import UserError @@ -759,7 +758,7 @@ def test_ignorelist_patterns( ): """Test ignorelist pattern matching logic directly.""" - # Disable file-based ignorelist to avoid depending on global config state. + # Disable ignorelist to avoid depending on global config state. config["lastgenre"]["ignorelist"] = False # Initialize plugin @@ -783,7 +782,7 @@ def test_ignorelist_literal_fallback_uses_fullmatch(config): full-match semantics: the pattern must equal the entire genre string, not just appear as a substring. """ - # Disable file-based ignorelist to avoid depending on global config state. + # Disable ignorelist to avoid depending on global config state. config["lastgenre"]["ignorelist"] = False plugin = lastgenre.LastGenrePlugin() # "[not valid regex" is not valid regex, so _compile_ignorelist_patterns @@ -809,85 +808,135 @@ def test_ignorelist_literal_fallback_uses_fullmatch(config): @pytest.mark.parametrize( - "file_content, expected_ignorelist", + "ignorelist_config, expected_ignorelist", [ - # Basic artist with pattern - ("metallica:\n metal", {"metallica": ["metal"]}), - # Global ignorelist - ("*:\n spoken word", {"*": ["spoken word"]}), + # Basic artist with single pattern + ({"metallica": ["metal"]}, {"metallica": ["metal"]}), + # Global ignorelist with '*' key + ({"*": ["spoken word"]}, {"*": ["spoken word"]}), # Multiple patterns per artist ( - "metallica:\n metal\n .*rock.*", + {"metallica": ["metal", ".*rock.*"]}, {"metallica": ["metal", ".*rock.*"]}, ), - # Comments and empty lines skipped + # Combined global and artist-specific ( - "# comment\n*:\n spoken word\n\nmetallica:\n metal", + {"*": ["spoken word"], "metallica": ["metal"]}, {"*": ["spoken word"], "metallica": ["metal"]}, ), - # Case insensitive artist names — key lowercased, pattern kept as-is + # Artist names are lowercased; patterns are kept as-is # (patterns compiled with re.IGNORECASE so case doesn't matter for matching) - ("METALLICA:\n METAL", {"metallica": ["METAL"]}), - # Invalid regex pattern that gets escaped - ("artist:\n [invalid(regex", {"artist": ["\\[invalid\\(regex"]}), - # Empty file - ("", {}), + ({"METALLICA": ["METAL"]}, {"metallica": ["METAL"]}), + # Invalid regex pattern that gets escaped (full-match literal fallback) + ({"artist": ["[invalid(regex"]}, {"artist": ["\\[invalid\\(regex"]}), + # Disabled via False / empty dict — both produce empty dict + (False, {}), + ({}, {}), ], ) -def test_ignorelist_file_format(config, file_content, expected_ignorelist): - """Test ignorelist file format parsing.""" +def test_ignorelist_config_format( + config, ignorelist_config, expected_ignorelist +): + """Test ignorelist parsing from beets config (dict-based).""" + config["lastgenre"]["ignorelist"] = ignorelist_config + plugin = lastgenre.LastGenrePlugin() - with tempfile.NamedTemporaryFile( - mode="w", suffix=".txt", delete=False, encoding="utf-8" - ) as f: - f.write(file_content) - ignorelist_file = f.name + # Convert compiled regex patterns back to strings for comparison + string_ignorelist = { + artist: [p.pattern for p in patterns] + for artist, patterns in plugin.ignorelist.items() + } - try: - config["lastgenre"]["ignorelist"] = ignorelist_file - plugin = lastgenre.LastGenrePlugin() - - # Convert compiled regex patterns back to strings for comparison - string_ignorelist = { - artist: [p.pattern for p in patterns] - for artist, patterns in plugin.ignorelist.items() - } - - assert string_ignorelist == expected_ignorelist - - finally: - os.unlink(ignorelist_file) + assert string_ignorelist == expected_ignorelist @pytest.mark.parametrize( - "invalid_content, expected_error_message", + "invalid_config, expected_error_message", [ - # Missing colon - ("metallica\n metal", "Malformed ignorelist section header"), - # Pattern before section - (" metal\nmetallica:\n heavy metal", "before any section header"), - # Unindented pattern - ("metallica:\nmetal", "Malformed ignorelist section header"), + # A plain string (e.g. accidental file path) instead of a mapping + ( + "/path/to/ignorelist.txt", + "expected a mapping", + ), + # An integer instead of a mapping + ( + 42, + "expected a mapping", + ), + # A list of strings instead of a mapping + ( + ["spoken word", "comedy"], + "expected a mapping", + ), + # A mapping with a non-list value + ( + {"metallica": "metal"}, + "expected a list of patterns", + ), ], ) -def test_ignorelist_file_format_errors( - config, invalid_content, expected_error_message +def test_ignorelist_config_format_errors( + config, invalid_config, expected_error_message ): - """Test ignorelist file format error handling.""" + """Test ignorelist config validation error handling.""" + config["lastgenre"]["ignorelist"] = invalid_config - with tempfile.NamedTemporaryFile( - mode="w", suffix=".txt", delete=False, encoding="utf-8" - ) as f: - f.write(invalid_content) - ignorelist_file = f.name + with pytest.raises(UserError) as exc_info: + lastgenre.LastGenrePlugin() - try: - config["lastgenre"]["ignorelist"] = ignorelist_file + assert expected_error_message in str(exc_info.value) - with pytest.raises(UserError) as exc_info: - lastgenre.LastGenrePlugin() - assert expected_error_message in str(exc_info.value) +def test_ignorelist_multivalued_album_artist_fallback(config): + """Verify ignorelist filtering for multi-valued album artist fallbacks. - finally: - os.unlink(ignorelist_file) + Genres already filtered for individual artists should not be dropped + due to a secondary (incorrect) check against the combined group artist. + """ + # Setup config: ignore 'Metal' for 'Artist A' and 'Artist Group'. + config["lastgenre"]["ignorelist"] = { + "Artist A": ["Metal"], + "Artist Group": ["Metal"], + } + # No whitelist and larger count to keep it simple. + config["lastgenre"]["whitelist"] = False + config["lastgenre"]["count"] = 10 + + plugin = lastgenre.LastGenrePlugin() + plugin.setup() + + # Mock album object. + obj = MagicMock(spec=Album) + obj.albumartist = "Artist Group" + obj.album = "Album Title" + obj.albumartists = ["Artist A", "Artist B"] + obj.get.return_value = [] # no existing genres + + # Mock client and its artist lookups. + # We must ensure it doesn't resolve at track or album stage. + plugin.client = MagicMock() + plugin.client.fetch_track_genre.return_value = [] + plugin.client.fetch_album_genre.return_value = [] + + # Artist lookup side effect: + # Artist A: Returns 'Metal' and 'Rock'. + # (Note: Client should have filtered 'Metal' already, so we simulate that by + # returning only 'Rock'). + # Artist B: Returns 'Metal' and 'Jazz'. + # Artist Group: Returns nothing (triggers fallback). + def mock_fetch_artist(artist): + if artist == "Artist A": + return ["Rock"] + if artist == "Artist B": + return ["Metal", "Jazz"] + return [] + + plugin.client.fetch_artist_genre.side_effect = mock_fetch_artist + + # Note: manually triggering the logic in _get_genre. + genres, label = plugin._get_genre(obj) + + assert "multi-valued album artist" in label + assert "Rock" in genres + assert "Metal" in genres # MUST survive because Artist B allowed it + assert "Jazz" in genres