lastgenre: Refactor ignorelist to yaml

This commit is contained in:
J0J0 Todos 2026-03-26 06:52:32 +01:00
parent c17eebb16e
commit 7f8fbbe6b6
2 changed files with 152 additions and 121 deletions

View file

@ -58,7 +58,7 @@ if TYPE_CHECKING:
Example: [['electronic', 'house'], ['electronic', 'techno']]"""
RawIgnorelist = dict[str, list[str]]
"""Mapping of artist name to list of raw regex/string patterns."""
"""Mapping of artist name (lowercased) to list of raw regex/string patterns."""
# Canonicalization tree processing.
@ -199,75 +199,54 @@ class LastGenrePlugin(plugins.BeetsPlugin):
return c14n_branches, canonicalize
def _load_ignorelist(self) -> Ignorelist:
"""Load the genre ignorelist from a configured file path.
r"""Load the genre ignorelist from the beets config.
For maximum compatibility with regex patterns, a custom format is used:
- Each section starts with an artist name, followed by a colon.
- Subsequent lines are indented (at least one space, typically 4 spaces)
and contain a regex pattern to match a genre or a literal genre name.
- A '*' key for artist can be used to specify global forbidden genres.
Reads ``lastgenre.ignorelist`` as a mapping of artist names to lists
of regex / literal genre patterns. Use the quoted ``'*'`` key to
define globally ignored genres::
Returns a compiled ignorelist dictionary mapping artist names to lists of
case-insensitive regex patterns. Returns empty dict if not configured.
lastgenre:
ignorelist:
'*':
- spoken word
- comedy
Artist Name:
- .*rock.*
- .*metal.*
Example ignorelist file format::
Artist names are matched case-insensitively. Each pattern is tried as
a regex full-match first; if it is not valid regex it is treated as a
literal string. All patterns are case-insensitive.
Artist Name:
.*rock.*
.*metal.*
Another Artist Name:
^jazz$
*:
spoken word
comedy
**Note:** Because patterns are parsed as plain YAML scalars,
backslashes (e.g. ``\w``) should **not** be double-escaped, and
single/double quotes should be avoided.
Returns an empty dict if ``ignorelist`` is not configured (``no`` /
``false``).
Raises:
UserError: if the ignorelist file cannot be read or if the file
format is invalid.
UserError: if the config value is not a mapping, or if an entry's
value is not a list of strings.
"""
ignorelist_raw: RawIgnorelist = defaultdict(list)
ignorelist_file = self.config["ignorelist"].get()
if not ignorelist_file:
ignorelist_config = self.config["ignorelist"].get()
if not ignorelist_config:
return {}
if not isinstance(ignorelist_file, str):
if not isinstance(ignorelist_config, dict):
raise UserError(
"Invalid value for lastgenre.ignorelist: expected path string "
f"or 'no', got {ignorelist_file!r}"
"Invalid value for lastgenre.ignorelist: expected a mapping "
f"(artist \u2192 list of patterns), got {ignorelist_config!r}"
)
self._log.debug("Loading ignorelist file {}", ignorelist_file)
section = None
try:
with Path(ignorelist_file).expanduser().open(encoding="utf-8") as f:
for lineno, line in enumerate(f, 1):
if not line.strip() or line.lstrip().startswith("#"):
continue
if not line.startswith(" "):
# Section header
header = line.strip().lower()
if not header.endswith(":"):
raise UserError(
f"Malformed ignorelist section header "
f"at line {lineno}"
)
section = header[:-1].rstrip()
if not section:
raise UserError(
f"Empty ignorelist section name "
f"at line {lineno}"
)
else:
# Pattern line: must be indented (at least one space)
if section is None:
raise UserError(
f"Ignorelist pattern line before any section header "
f"at line {lineno}"
)
ignorelist_raw[section].append(line.strip())
except OSError as exc:
raise UserError(
f"Cannot read ignorelist file {ignorelist_file!r}: {exc}"
) from exc
ignorelist_raw: RawIgnorelist = {}
for artist, patterns in ignorelist_config.items():
if not isinstance(patterns, list):
raise UserError(
f"Invalid lastgenre.ignorelist entry for {artist!r}: "
f"expected a list of patterns, got {patterns!r}"
)
ignorelist_raw[str(artist).lower()] = [str(p) for p in patterns]
self._log.extra_debug("Ignorelist: {}", ignorelist_raw)
return self._compile_ignorelist_patterns(ignorelist_raw)
@ -561,6 +540,9 @@ class LastGenrePlugin(plugins.BeetsPlugin):
)
if new_genres:
stage_label = "multi-valued album artist"
stage_artist = (
None # Already filtered per-artist in client
)
else:
# For "Various Artists", pick the most popular track genre.
item_genres = []

View file

@ -14,14 +14,13 @@
"""Tests for the 'lastgenre' plugin."""
import os
import re
import tempfile
from collections import defaultdict
from unittest.mock import Mock, patch
from unittest.mock import MagicMock, Mock, patch
import pytest
from beets.library import Album
from beets.test import _common
from beets.test.helper import IOMixin, PluginTestCase
from beets.ui import UserError
@ -759,7 +758,7 @@ def test_ignorelist_patterns(
):
"""Test ignorelist pattern matching logic directly."""
# Disable file-based ignorelist to avoid depending on global config state.
# Disable ignorelist to avoid depending on global config state.
config["lastgenre"]["ignorelist"] = False
# Initialize plugin
@ -783,7 +782,7 @@ def test_ignorelist_literal_fallback_uses_fullmatch(config):
full-match semantics: the pattern must equal the entire genre string,
not just appear as a substring.
"""
# Disable file-based ignorelist to avoid depending on global config state.
# Disable ignorelist to avoid depending on global config state.
config["lastgenre"]["ignorelist"] = False
plugin = lastgenre.LastGenrePlugin()
# "[not valid regex" is not valid regex, so _compile_ignorelist_patterns
@ -809,85 +808,135 @@ def test_ignorelist_literal_fallback_uses_fullmatch(config):
@pytest.mark.parametrize(
"file_content, expected_ignorelist",
"ignorelist_config, expected_ignorelist",
[
# Basic artist with pattern
("metallica:\n metal", {"metallica": ["metal"]}),
# Global ignorelist
("*:\n spoken word", {"*": ["spoken word"]}),
# Basic artist with single pattern
({"metallica": ["metal"]}, {"metallica": ["metal"]}),
# Global ignorelist with '*' key
({"*": ["spoken word"]}, {"*": ["spoken word"]}),
# Multiple patterns per artist
(
"metallica:\n metal\n .*rock.*",
{"metallica": ["metal", ".*rock.*"]},
{"metallica": ["metal", ".*rock.*"]},
),
# Comments and empty lines skipped
# Combined global and artist-specific
(
"# comment\n*:\n spoken word\n\nmetallica:\n metal",
{"*": ["spoken word"], "metallica": ["metal"]},
{"*": ["spoken word"], "metallica": ["metal"]},
),
# Case insensitive artist names — key lowercased, pattern kept as-is
# Artist names are lowercased; patterns are kept as-is
# (patterns compiled with re.IGNORECASE so case doesn't matter for matching)
("METALLICA:\n METAL", {"metallica": ["METAL"]}),
# Invalid regex pattern that gets escaped
("artist:\n [invalid(regex", {"artist": ["\\[invalid\\(regex"]}),
# Empty file
("", {}),
({"METALLICA": ["METAL"]}, {"metallica": ["METAL"]}),
# Invalid regex pattern that gets escaped (full-match literal fallback)
({"artist": ["[invalid(regex"]}, {"artist": ["\\[invalid\\(regex"]}),
# Disabled via False / empty dict — both produce empty dict
(False, {}),
({}, {}),
],
)
def test_ignorelist_file_format(config, file_content, expected_ignorelist):
"""Test ignorelist file format parsing."""
def test_ignorelist_config_format(
config, ignorelist_config, expected_ignorelist
):
"""Test ignorelist parsing from beets config (dict-based)."""
config["lastgenre"]["ignorelist"] = ignorelist_config
plugin = lastgenre.LastGenrePlugin()
with tempfile.NamedTemporaryFile(
mode="w", suffix=".txt", delete=False, encoding="utf-8"
) as f:
f.write(file_content)
ignorelist_file = f.name
# Convert compiled regex patterns back to strings for comparison
string_ignorelist = {
artist: [p.pattern for p in patterns]
for artist, patterns in plugin.ignorelist.items()
}
try:
config["lastgenre"]["ignorelist"] = ignorelist_file
plugin = lastgenre.LastGenrePlugin()
# Convert compiled regex patterns back to strings for comparison
string_ignorelist = {
artist: [p.pattern for p in patterns]
for artist, patterns in plugin.ignorelist.items()
}
assert string_ignorelist == expected_ignorelist
finally:
os.unlink(ignorelist_file)
assert string_ignorelist == expected_ignorelist
@pytest.mark.parametrize(
"invalid_content, expected_error_message",
"invalid_config, expected_error_message",
[
# Missing colon
("metallica\n metal", "Malformed ignorelist section header"),
# Pattern before section
(" metal\nmetallica:\n heavy metal", "before any section header"),
# Unindented pattern
("metallica:\nmetal", "Malformed ignorelist section header"),
# A plain string (e.g. accidental file path) instead of a mapping
(
"/path/to/ignorelist.txt",
"expected a mapping",
),
# An integer instead of a mapping
(
42,
"expected a mapping",
),
# A list of strings instead of a mapping
(
["spoken word", "comedy"],
"expected a mapping",
),
# A mapping with a non-list value
(
{"metallica": "metal"},
"expected a list of patterns",
),
],
)
def test_ignorelist_file_format_errors(
config, invalid_content, expected_error_message
def test_ignorelist_config_format_errors(
config, invalid_config, expected_error_message
):
"""Test ignorelist file format error handling."""
"""Test ignorelist config validation error handling."""
config["lastgenre"]["ignorelist"] = invalid_config
with tempfile.NamedTemporaryFile(
mode="w", suffix=".txt", delete=False, encoding="utf-8"
) as f:
f.write(invalid_content)
ignorelist_file = f.name
with pytest.raises(UserError) as exc_info:
lastgenre.LastGenrePlugin()
try:
config["lastgenre"]["ignorelist"] = ignorelist_file
assert expected_error_message in str(exc_info.value)
with pytest.raises(UserError) as exc_info:
lastgenre.LastGenrePlugin()
assert expected_error_message in str(exc_info.value)
def test_ignorelist_multivalued_album_artist_fallback(config):
"""Verify ignorelist filtering for multi-valued album artist fallbacks.
finally:
os.unlink(ignorelist_file)
Genres already filtered for individual artists should not be dropped
due to a secondary (incorrect) check against the combined group artist.
"""
# Setup config: ignore 'Metal' for 'Artist A' and 'Artist Group'.
config["lastgenre"]["ignorelist"] = {
"Artist A": ["Metal"],
"Artist Group": ["Metal"],
}
# No whitelist and larger count to keep it simple.
config["lastgenre"]["whitelist"] = False
config["lastgenre"]["count"] = 10
plugin = lastgenre.LastGenrePlugin()
plugin.setup()
# Mock album object.
obj = MagicMock(spec=Album)
obj.albumartist = "Artist Group"
obj.album = "Album Title"
obj.albumartists = ["Artist A", "Artist B"]
obj.get.return_value = [] # no existing genres
# Mock client and its artist lookups.
# We must ensure it doesn't resolve at track or album stage.
plugin.client = MagicMock()
plugin.client.fetch_track_genre.return_value = []
plugin.client.fetch_album_genre.return_value = []
# Artist lookup side effect:
# Artist A: Returns 'Metal' and 'Rock'.
# (Note: Client should have filtered 'Metal' already, so we simulate that by
# returning only 'Rock').
# Artist B: Returns 'Metal' and 'Jazz'.
# Artist Group: Returns nothing (triggers fallback).
def mock_fetch_artist(artist):
if artist == "Artist A":
return ["Rock"]
if artist == "Artist B":
return ["Metal", "Jazz"]
return []
plugin.client.fetch_artist_genre.side_effect = mock_fetch_artist
# Note: manually triggering the logic in _get_genre.
genres, label = plugin._get_genre(obj)
assert "multi-valued album artist" in label
assert "Rock" in genres
assert "Metal" in genres # MUST survive because Artist B allowed it
assert "Jazz" in genres