lastgenre_aliases

- Use the INI format (since YAML and regex is a nightmare...)
- Ship a default aliases file with beets
  - Dont use non-capturing groups use ?: because colons are a problem.
    Also some replacement patterns were bad. Fixed
- Implement loading the aliases.ini file similar to how the whitelist
  file is handled.
- Apply aliases in two places, right after last.fm fetch and after
  collecting existing genres.
This commit is contained in:
J0J0 Todos 2025-04-26 08:14:58 +02:00
parent 0a26202a1d
commit 6e28672474
2 changed files with 95 additions and 1 deletions

View file

@ -22,7 +22,10 @@ The scraper script used is available here:
https://gist.github.com/1241307
"""
import codecs
import configparser
import os
import re
import traceback
from functools import wraps
from pathlib import Path
@ -33,7 +36,7 @@ import yaml
from beets import config, library, plugins, ui
from beets.library import Album, Item
from beets.util import plurality, unique_list
from beets.util import plurality, unique_list, normpath
LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY)
@ -103,6 +106,7 @@ def log_and_pretend(apply_func):
WHITELIST = os.path.join(os.path.dirname(__file__), "genres.txt")
C14N_TREE = os.path.join(os.path.dirname(__file__), "genres-tree.yaml")
ALIASES = os.path.join(os.path.dirname(__file__), "aliases.ini")
class LastGenrePlugin(plugins.BeetsPlugin):
@ -125,6 +129,7 @@ class LastGenrePlugin(plugins.BeetsPlugin):
"title_case": True,
"extended_debug": False,
"pretend": False,
"aliases": True,
}
)
self.setup()
@ -137,6 +142,7 @@ class LastGenrePlugin(plugins.BeetsPlugin):
self._genre_cache = {}
self.whitelist = self._load_whitelist()
self.c14n_branches, self.canonicalize = self._load_c14n_tree()
self.aliases = self._load_aliases()
def _load_whitelist(self) -> set[str]:
"""Load the whitelist from a text file.
@ -179,6 +185,35 @@ class LastGenrePlugin(plugins.BeetsPlugin):
flatten_tree(genres_tree, [], c14n_branches)
return c14n_branches, canonicalize
def _load_aliases(self) -> list[str]:
"""Load configured aliases for a list of tags."""
aliases = []
aliases_filename = self.config["aliases"].get()
if aliases_filename in (
True,
"",
None,
): # Indicates the default aliases file.
aliases_filename = ALIASES
if aliases_filename:
aliases_filename = normpath(aliases_filename)
try:
config_parser = configparser.ConfigParser()
config_parser.read(aliases_filename, encoding="utf-8")
for section in config_parser.sections():
aliases.extend(
{pattern: replacement}
for pattern, replacement in config_parser[
section
].items()
)
self._log.debug(
"Loaded genre aliases from {0}", aliases_filename
)
except Exception as exc:
self._log.error("Error loading aliases file: {0}", exc)
return aliases
@property
def sources(self) -> tuple[str, ...]:
"""A tuple of allowed genre sources. May contain 'track',
@ -289,6 +324,25 @@ class LastGenrePlugin(plugins.BeetsPlugin):
return True
return False
def _apply_aliases(self, genres):
"""Apply regex aliases to the genre list."""
if not self.aliases or not genres:
return genres
result = genres.copy()
for alias_pair in self.aliases:
for pattern, replacement in alias_pair.items():
try:
result = [
re.sub(pattern, replacement, genre, flags=re.IGNORECASE)
for genre in result
]
except re.error as exc:
self._log.error(
"Regex error in pattern '{0}': {1}", pattern, exc
)
return result
# Cached last.fm entity lookups.
def _last_lookup(self, entity, method, *args):
@ -312,6 +366,11 @@ class LastGenrePlugin(plugins.BeetsPlugin):
genre = self._genre_cache[key]
if self.config["extended_debug"]:
self._log.debug("last.fm (unfiltered) {} tags: {}", entity, genre)
genre = self._apply_aliases(genre)
if self.config["extended_debug"]:
self._log.debug("last.fm (aliased) {} tags: {}", entity, genre)
return genre
def fetch_album_genre(self, obj):
@ -354,6 +413,10 @@ class LastGenrePlugin(plugins.BeetsPlugin):
else:
item_genre = obj.get("genre").split(separator)
item_genre = self._apply_aliases(item_genre)
if self.config["extended_debug"]:
self._log.debug("Existing genres (aliased): {}", item_genre)
# Filter out empty strings
return [g for g in item_genre if g]

View file

@ -0,0 +1,31 @@
[regexaliases]
# Common genres with variable spelling
(?i)electronic[\s-]?music? = Electronic
(?i)alt[\s-]?rock|alternative(\srock)?|alternative|alt = Alternative Rock
(?i)hip[\s-]?hop|hiphop|rap = Hip Hop
down[ /-]*(beat|tempo?) = downtempo
# Common genres with ampersands
(?i)r[\s&]*b|rhythm and blues = R&B
d(rum)?[\s]*(&|and|n)?[\s]*b(ass)? = Drum And Bass
drill[ n/]*bass = drill & bass
r(hythm)?[ n/]*b(lues)? = rhythm & blues
rock[ n/]*roll = rock & roll
# Consistent delimiters
(c|k|j)[ /]*(folk|goth|hip hop|pop|rock|ska|trance) = \g<1>-\g<2>
(euro(?!p[ae]+n?)|neo|post|tech(?!n[io]))[ /]*(\w+) = \g<1>-\g<2>
(g?lo)[ /]*fi = \g<1>-fi
(glitch|hip|jazz|trip)y?([ /]*hip)?[ /]*hop = \g<1> hop
(p|g)[ /]*funk = \g<1>-funk
nu[/-]*(disco|jazz|metal|soul) = nu \g<1>
synth[ /-]+(\w+) = synth\g<1>
# Abbreviation related
alt(/|er*n(/|ati[fv](es?)?))? = alternative
avant(/|gard)? = avantgarde
prog+(/|r*es+(/|i[fv]e?)?)? = progressive
ps?y(/|ch(/|[aeo]l?del+i[ca]+)?)? = psychedelic
trad(/|ition(/|al)?)?-? = traditional