From de2bdfa4807b676d814c0ddbc4be6e1e18d25082 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sat, 19 Apr 2025 08:24:07 +0200 Subject: [PATCH] lastgenre: Load and parse blacklist file Uses a custom text file format since YAML, INI, TOML, ... all have their flaws with parsing regex patterns. --- beetsplug/lastgenre/__init__.py | 53 +++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 8c09eefea..934d793a5 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -24,6 +24,7 @@ https://gist.github.com/1241307 import os import traceback +from collections import defaultdict from pathlib import Path from typing import Union @@ -32,6 +33,7 @@ import yaml from beets import config, library, plugins, ui from beets.library import Album, Item +from beets.ui import UserError from beets.util import plurality, unique_list LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY) @@ -101,6 +103,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): "prefer_specific": False, "title_case": True, "extended_debug": False, + "blacklist": False, } ) self.setup() @@ -113,6 +116,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): self._genre_cache = {} self.whitelist = self._load_whitelist() self.c14n_branches, self.canonicalize = self._load_c14n_tree() + self.blacklist = self._load_blacklist() def _load_whitelist(self) -> set[str]: """Load the whitelist from a text file. @@ -155,6 +159,55 @@ class LastGenrePlugin(plugins.BeetsPlugin): flatten_tree(genres_tree, [], c14n_branches) return c14n_branches, canonicalize + def _load_blacklist(self): + """Load the blacklist from a configured file path. + + For maximum compatibility with regex patterns, a custom format is used: + - Each section starts with an artist name, followed by a colon. + - Subsequent lines are indented (at least one space, typically 4 spaces) and + contain a regex pattern to match a genre. + + Eg.: + artist name 1: + genre pattern 1 + genre pattern 2 + artist name 2: + genre pattern 3 + + Raises: + UserError: if the file format is invalid. + """ + blacklist = defaultdict(list) + if not (bl_filename := self.config["blacklist"].get()): + return blacklist + + self._log.debug("Loading blacklist file {0}", bl_filename) + section = None + with Path(bl_filename).expanduser().open(encoding="utf-8") as f: + for lineno, line in enumerate(f, 1): + line = line.lower() + if not line.strip() or line.lstrip().startswith("#"): + continue + if not line.startswith(" "): + # Section header + if not line.rstrip().endswith(":"): + raise UserError( + f"Malformed blacklist section header " + f"at line {lineno}: {line}" + ) + section = line.rstrip(":\r\n") + else: + # Pattern line: must be indented (at least one space) + if section is None: + raise UserError( + f"Blacklist regex pattern line before any section header " + f"at line {lineno}: {line}" + ) + blacklist[section].append(line.strip()) + if self.config["extended_debug"]: + self._log.debug("Blacklist: {}", blacklist) + return blacklist + @property def sources(self) -> tuple[str, ...]: """A tuple of allowed genre sources. May contain 'track',