mirror of
https://github.com/beetbox/beets.git
synced 2026-03-30 02:02:49 +02:00
lastgenre: Refactor ignorelist to yaml
This commit is contained in:
parent
c17eebb16e
commit
7f8fbbe6b6
2 changed files with 152 additions and 121 deletions
|
|
@ -58,7 +58,7 @@ if TYPE_CHECKING:
|
|||
Example: [['electronic', 'house'], ['electronic', 'techno']]"""
|
||||
|
||||
RawIgnorelist = dict[str, list[str]]
|
||||
"""Mapping of artist name to list of raw regex/string patterns."""
|
||||
"""Mapping of artist name (lowercased) to list of raw regex/string patterns."""
|
||||
|
||||
|
||||
# Canonicalization tree processing.
|
||||
|
|
@ -199,75 +199,54 @@ class LastGenrePlugin(plugins.BeetsPlugin):
|
|||
return c14n_branches, canonicalize
|
||||
|
||||
def _load_ignorelist(self) -> Ignorelist:
|
||||
"""Load the genre ignorelist from a configured file path.
|
||||
r"""Load the genre ignorelist from the beets config.
|
||||
|
||||
For maximum compatibility with regex patterns, a custom format is used:
|
||||
- Each section starts with an artist name, followed by a colon.
|
||||
- Subsequent lines are indented (at least one space, typically 4 spaces)
|
||||
and contain a regex pattern to match a genre or a literal genre name.
|
||||
- A '*' key for artist can be used to specify global forbidden genres.
|
||||
Reads ``lastgenre.ignorelist`` as a mapping of artist names to lists
|
||||
of regex / literal genre patterns. Use the quoted ``'*'`` key to
|
||||
define globally ignored genres::
|
||||
|
||||
Returns a compiled ignorelist dictionary mapping artist names to lists of
|
||||
case-insensitive regex patterns. Returns empty dict if not configured.
|
||||
lastgenre:
|
||||
ignorelist:
|
||||
'*':
|
||||
- spoken word
|
||||
- comedy
|
||||
Artist Name:
|
||||
- .*rock.*
|
||||
- .*metal.*
|
||||
|
||||
Example ignorelist file format::
|
||||
Artist names are matched case-insensitively. Each pattern is tried as
|
||||
a regex full-match first; if it is not valid regex it is treated as a
|
||||
literal string. All patterns are case-insensitive.
|
||||
|
||||
Artist Name:
|
||||
.*rock.*
|
||||
.*metal.*
|
||||
Another Artist Name:
|
||||
^jazz$
|
||||
*:
|
||||
spoken word
|
||||
comedy
|
||||
**Note:** Because patterns are parsed as plain YAML scalars,
|
||||
backslashes (e.g. ``\w``) should **not** be double-escaped, and
|
||||
single/double quotes should be avoided.
|
||||
|
||||
Returns an empty dict if ``ignorelist`` is not configured (``no`` /
|
||||
``false``).
|
||||
|
||||
Raises:
|
||||
UserError: if the ignorelist file cannot be read or if the file
|
||||
format is invalid.
|
||||
UserError: if the config value is not a mapping, or if an entry's
|
||||
value is not a list of strings.
|
||||
"""
|
||||
ignorelist_raw: RawIgnorelist = defaultdict(list)
|
||||
ignorelist_file = self.config["ignorelist"].get()
|
||||
if not ignorelist_file:
|
||||
ignorelist_config = self.config["ignorelist"].get()
|
||||
if not ignorelist_config:
|
||||
return {}
|
||||
if not isinstance(ignorelist_file, str):
|
||||
if not isinstance(ignorelist_config, dict):
|
||||
raise UserError(
|
||||
"Invalid value for lastgenre.ignorelist: expected path string "
|
||||
f"or 'no', got {ignorelist_file!r}"
|
||||
"Invalid value for lastgenre.ignorelist: expected a mapping "
|
||||
f"(artist \u2192 list of patterns), got {ignorelist_config!r}"
|
||||
)
|
||||
|
||||
self._log.debug("Loading ignorelist file {}", ignorelist_file)
|
||||
section = None
|
||||
try:
|
||||
with Path(ignorelist_file).expanduser().open(encoding="utf-8") as f:
|
||||
for lineno, line in enumerate(f, 1):
|
||||
if not line.strip() or line.lstrip().startswith("#"):
|
||||
continue
|
||||
if not line.startswith(" "):
|
||||
# Section header
|
||||
header = line.strip().lower()
|
||||
if not header.endswith(":"):
|
||||
raise UserError(
|
||||
f"Malformed ignorelist section header "
|
||||
f"at line {lineno}"
|
||||
)
|
||||
section = header[:-1].rstrip()
|
||||
if not section:
|
||||
raise UserError(
|
||||
f"Empty ignorelist section name "
|
||||
f"at line {lineno}"
|
||||
)
|
||||
else:
|
||||
# Pattern line: must be indented (at least one space)
|
||||
if section is None:
|
||||
raise UserError(
|
||||
f"Ignorelist pattern line before any section header "
|
||||
f"at line {lineno}"
|
||||
)
|
||||
ignorelist_raw[section].append(line.strip())
|
||||
except OSError as exc:
|
||||
raise UserError(
|
||||
f"Cannot read ignorelist file {ignorelist_file!r}: {exc}"
|
||||
) from exc
|
||||
ignorelist_raw: RawIgnorelist = {}
|
||||
for artist, patterns in ignorelist_config.items():
|
||||
if not isinstance(patterns, list):
|
||||
raise UserError(
|
||||
f"Invalid lastgenre.ignorelist entry for {artist!r}: "
|
||||
f"expected a list of patterns, got {patterns!r}"
|
||||
)
|
||||
ignorelist_raw[str(artist).lower()] = [str(p) for p in patterns]
|
||||
|
||||
self._log.extra_debug("Ignorelist: {}", ignorelist_raw)
|
||||
return self._compile_ignorelist_patterns(ignorelist_raw)
|
||||
|
||||
|
|
@ -561,6 +540,9 @@ class LastGenrePlugin(plugins.BeetsPlugin):
|
|||
)
|
||||
if new_genres:
|
||||
stage_label = "multi-valued album artist"
|
||||
stage_artist = (
|
||||
None # Already filtered per-artist in client
|
||||
)
|
||||
else:
|
||||
# For "Various Artists", pick the most popular track genre.
|
||||
item_genres = []
|
||||
|
|
|
|||
|
|
@ -14,14 +14,13 @@
|
|||
|
||||
"""Tests for the 'lastgenre' plugin."""
|
||||
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from collections import defaultdict
|
||||
from unittest.mock import Mock, patch
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from beets.library import Album
|
||||
from beets.test import _common
|
||||
from beets.test.helper import IOMixin, PluginTestCase
|
||||
from beets.ui import UserError
|
||||
|
|
@ -759,7 +758,7 @@ def test_ignorelist_patterns(
|
|||
):
|
||||
"""Test ignorelist pattern matching logic directly."""
|
||||
|
||||
# Disable file-based ignorelist to avoid depending on global config state.
|
||||
# Disable ignorelist to avoid depending on global config state.
|
||||
config["lastgenre"]["ignorelist"] = False
|
||||
|
||||
# Initialize plugin
|
||||
|
|
@ -783,7 +782,7 @@ def test_ignorelist_literal_fallback_uses_fullmatch(config):
|
|||
full-match semantics: the pattern must equal the entire genre string,
|
||||
not just appear as a substring.
|
||||
"""
|
||||
# Disable file-based ignorelist to avoid depending on global config state.
|
||||
# Disable ignorelist to avoid depending on global config state.
|
||||
config["lastgenre"]["ignorelist"] = False
|
||||
plugin = lastgenre.LastGenrePlugin()
|
||||
# "[not valid regex" is not valid regex, so _compile_ignorelist_patterns
|
||||
|
|
@ -809,85 +808,135 @@ def test_ignorelist_literal_fallback_uses_fullmatch(config):
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file_content, expected_ignorelist",
|
||||
"ignorelist_config, expected_ignorelist",
|
||||
[
|
||||
# Basic artist with pattern
|
||||
("metallica:\n metal", {"metallica": ["metal"]}),
|
||||
# Global ignorelist
|
||||
("*:\n spoken word", {"*": ["spoken word"]}),
|
||||
# Basic artist with single pattern
|
||||
({"metallica": ["metal"]}, {"metallica": ["metal"]}),
|
||||
# Global ignorelist with '*' key
|
||||
({"*": ["spoken word"]}, {"*": ["spoken word"]}),
|
||||
# Multiple patterns per artist
|
||||
(
|
||||
"metallica:\n metal\n .*rock.*",
|
||||
{"metallica": ["metal", ".*rock.*"]},
|
||||
{"metallica": ["metal", ".*rock.*"]},
|
||||
),
|
||||
# Comments and empty lines skipped
|
||||
# Combined global and artist-specific
|
||||
(
|
||||
"# comment\n*:\n spoken word\n\nmetallica:\n metal",
|
||||
{"*": ["spoken word"], "metallica": ["metal"]},
|
||||
{"*": ["spoken word"], "metallica": ["metal"]},
|
||||
),
|
||||
# Case insensitive artist names — key lowercased, pattern kept as-is
|
||||
# Artist names are lowercased; patterns are kept as-is
|
||||
# (patterns compiled with re.IGNORECASE so case doesn't matter for matching)
|
||||
("METALLICA:\n METAL", {"metallica": ["METAL"]}),
|
||||
# Invalid regex pattern that gets escaped
|
||||
("artist:\n [invalid(regex", {"artist": ["\\[invalid\\(regex"]}),
|
||||
# Empty file
|
||||
("", {}),
|
||||
({"METALLICA": ["METAL"]}, {"metallica": ["METAL"]}),
|
||||
# Invalid regex pattern that gets escaped (full-match literal fallback)
|
||||
({"artist": ["[invalid(regex"]}, {"artist": ["\\[invalid\\(regex"]}),
|
||||
# Disabled via False / empty dict — both produce empty dict
|
||||
(False, {}),
|
||||
({}, {}),
|
||||
],
|
||||
)
|
||||
def test_ignorelist_file_format(config, file_content, expected_ignorelist):
|
||||
"""Test ignorelist file format parsing."""
|
||||
def test_ignorelist_config_format(
|
||||
config, ignorelist_config, expected_ignorelist
|
||||
):
|
||||
"""Test ignorelist parsing from beets config (dict-based)."""
|
||||
config["lastgenre"]["ignorelist"] = ignorelist_config
|
||||
plugin = lastgenre.LastGenrePlugin()
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".txt", delete=False, encoding="utf-8"
|
||||
) as f:
|
||||
f.write(file_content)
|
||||
ignorelist_file = f.name
|
||||
# Convert compiled regex patterns back to strings for comparison
|
||||
string_ignorelist = {
|
||||
artist: [p.pattern for p in patterns]
|
||||
for artist, patterns in plugin.ignorelist.items()
|
||||
}
|
||||
|
||||
try:
|
||||
config["lastgenre"]["ignorelist"] = ignorelist_file
|
||||
plugin = lastgenre.LastGenrePlugin()
|
||||
|
||||
# Convert compiled regex patterns back to strings for comparison
|
||||
string_ignorelist = {
|
||||
artist: [p.pattern for p in patterns]
|
||||
for artist, patterns in plugin.ignorelist.items()
|
||||
}
|
||||
|
||||
assert string_ignorelist == expected_ignorelist
|
||||
|
||||
finally:
|
||||
os.unlink(ignorelist_file)
|
||||
assert string_ignorelist == expected_ignorelist
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_content, expected_error_message",
|
||||
"invalid_config, expected_error_message",
|
||||
[
|
||||
# Missing colon
|
||||
("metallica\n metal", "Malformed ignorelist section header"),
|
||||
# Pattern before section
|
||||
(" metal\nmetallica:\n heavy metal", "before any section header"),
|
||||
# Unindented pattern
|
||||
("metallica:\nmetal", "Malformed ignorelist section header"),
|
||||
# A plain string (e.g. accidental file path) instead of a mapping
|
||||
(
|
||||
"/path/to/ignorelist.txt",
|
||||
"expected a mapping",
|
||||
),
|
||||
# An integer instead of a mapping
|
||||
(
|
||||
42,
|
||||
"expected a mapping",
|
||||
),
|
||||
# A list of strings instead of a mapping
|
||||
(
|
||||
["spoken word", "comedy"],
|
||||
"expected a mapping",
|
||||
),
|
||||
# A mapping with a non-list value
|
||||
(
|
||||
{"metallica": "metal"},
|
||||
"expected a list of patterns",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_ignorelist_file_format_errors(
|
||||
config, invalid_content, expected_error_message
|
||||
def test_ignorelist_config_format_errors(
|
||||
config, invalid_config, expected_error_message
|
||||
):
|
||||
"""Test ignorelist file format error handling."""
|
||||
"""Test ignorelist config validation error handling."""
|
||||
config["lastgenre"]["ignorelist"] = invalid_config
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".txt", delete=False, encoding="utf-8"
|
||||
) as f:
|
||||
f.write(invalid_content)
|
||||
ignorelist_file = f.name
|
||||
with pytest.raises(UserError) as exc_info:
|
||||
lastgenre.LastGenrePlugin()
|
||||
|
||||
try:
|
||||
config["lastgenre"]["ignorelist"] = ignorelist_file
|
||||
assert expected_error_message in str(exc_info.value)
|
||||
|
||||
with pytest.raises(UserError) as exc_info:
|
||||
lastgenre.LastGenrePlugin()
|
||||
|
||||
assert expected_error_message in str(exc_info.value)
|
||||
def test_ignorelist_multivalued_album_artist_fallback(config):
|
||||
"""Verify ignorelist filtering for multi-valued album artist fallbacks.
|
||||
|
||||
finally:
|
||||
os.unlink(ignorelist_file)
|
||||
Genres already filtered for individual artists should not be dropped
|
||||
due to a secondary (incorrect) check against the combined group artist.
|
||||
"""
|
||||
# Setup config: ignore 'Metal' for 'Artist A' and 'Artist Group'.
|
||||
config["lastgenre"]["ignorelist"] = {
|
||||
"Artist A": ["Metal"],
|
||||
"Artist Group": ["Metal"],
|
||||
}
|
||||
# No whitelist and larger count to keep it simple.
|
||||
config["lastgenre"]["whitelist"] = False
|
||||
config["lastgenre"]["count"] = 10
|
||||
|
||||
plugin = lastgenre.LastGenrePlugin()
|
||||
plugin.setup()
|
||||
|
||||
# Mock album object.
|
||||
obj = MagicMock(spec=Album)
|
||||
obj.albumartist = "Artist Group"
|
||||
obj.album = "Album Title"
|
||||
obj.albumartists = ["Artist A", "Artist B"]
|
||||
obj.get.return_value = [] # no existing genres
|
||||
|
||||
# Mock client and its artist lookups.
|
||||
# We must ensure it doesn't resolve at track or album stage.
|
||||
plugin.client = MagicMock()
|
||||
plugin.client.fetch_track_genre.return_value = []
|
||||
plugin.client.fetch_album_genre.return_value = []
|
||||
|
||||
# Artist lookup side effect:
|
||||
# Artist A: Returns 'Metal' and 'Rock'.
|
||||
# (Note: Client should have filtered 'Metal' already, so we simulate that by
|
||||
# returning only 'Rock').
|
||||
# Artist B: Returns 'Metal' and 'Jazz'.
|
||||
# Artist Group: Returns nothing (triggers fallback).
|
||||
def mock_fetch_artist(artist):
|
||||
if artist == "Artist A":
|
||||
return ["Rock"]
|
||||
if artist == "Artist B":
|
||||
return ["Metal", "Jazz"]
|
||||
return []
|
||||
|
||||
plugin.client.fetch_artist_genre.side_effect = mock_fetch_artist
|
||||
|
||||
# Note: manually triggering the logic in _get_genre.
|
||||
genres, label = plugin._get_genre(obj)
|
||||
|
||||
assert "multi-valued album artist" in label
|
||||
assert "Rock" in genres
|
||||
assert "Metal" in genres # MUST survive because Artist B allowed it
|
||||
assert "Jazz" in genres
|
||||
|
|
|
|||
Loading…
Reference in a new issue