diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index b26260440..e3af20769 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -22,7 +22,10 @@ The scraper script used is available here: https://gist.github.com/1241307 """ +import codecs +import configparser import os +import re import traceback from functools import wraps from pathlib import Path @@ -33,7 +36,7 @@ import yaml from beets import config, library, plugins, ui from beets.library import Album, Item -from beets.util import plurality, unique_list +from beets.util import plurality, unique_list, normpath LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY) @@ -103,6 +106,7 @@ def log_and_pretend(apply_func): WHITELIST = os.path.join(os.path.dirname(__file__), "genres.txt") C14N_TREE = os.path.join(os.path.dirname(__file__), "genres-tree.yaml") +ALIASES = os.path.join(os.path.dirname(__file__), "aliases.ini") class LastGenrePlugin(plugins.BeetsPlugin): @@ -125,6 +129,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): "title_case": True, "extended_debug": False, "pretend": False, + "aliases": True, } ) self.setup() @@ -137,6 +142,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): self._genre_cache = {} self.whitelist = self._load_whitelist() self.c14n_branches, self.canonicalize = self._load_c14n_tree() + self.aliases = self._load_aliases() def _load_whitelist(self) -> set[str]: """Load the whitelist from a text file. @@ -179,6 +185,35 @@ class LastGenrePlugin(plugins.BeetsPlugin): flatten_tree(genres_tree, [], c14n_branches) return c14n_branches, canonicalize + def _load_aliases(self) -> list[str]: + """Load configured aliases for a list of tags.""" + aliases = [] + aliases_filename = self.config["aliases"].get() + if aliases_filename in ( + True, + "", + None, + ): # Indicates the default aliases file. + aliases_filename = ALIASES + if aliases_filename: + aliases_filename = normpath(aliases_filename) + try: + config_parser = configparser.ConfigParser() + config_parser.read(aliases_filename, encoding="utf-8") + for section in config_parser.sections(): + aliases.extend( + {pattern: replacement} + for pattern, replacement in config_parser[ + section + ].items() + ) + self._log.debug( + "Loaded genre aliases from {0}", aliases_filename + ) + except Exception as exc: + self._log.error("Error loading aliases file: {0}", exc) + return aliases + @property def sources(self) -> tuple[str, ...]: """A tuple of allowed genre sources. May contain 'track', @@ -289,6 +324,25 @@ class LastGenrePlugin(plugins.BeetsPlugin): return True return False + def _apply_aliases(self, genres): + """Apply regex aliases to the genre list.""" + if not self.aliases or not genres: + return genres + + result = genres.copy() + for alias_pair in self.aliases: + for pattern, replacement in alias_pair.items(): + try: + result = [ + re.sub(pattern, replacement, genre, flags=re.IGNORECASE) + for genre in result + ] + except re.error as exc: + self._log.error( + "Regex error in pattern '{0}': {1}", pattern, exc + ) + return result + # Cached last.fm entity lookups. def _last_lookup(self, entity, method, *args): @@ -312,6 +366,11 @@ class LastGenrePlugin(plugins.BeetsPlugin): genre = self._genre_cache[key] if self.config["extended_debug"]: self._log.debug("last.fm (unfiltered) {} tags: {}", entity, genre) + + genre = self._apply_aliases(genre) + if self.config["extended_debug"]: + self._log.debug("last.fm (aliased) {} tags: {}", entity, genre) + return genre def fetch_album_genre(self, obj): @@ -354,6 +413,10 @@ class LastGenrePlugin(plugins.BeetsPlugin): else: item_genre = obj.get("genre").split(separator) + item_genre = self._apply_aliases(item_genre) + if self.config["extended_debug"]: + self._log.debug("Existing genres (aliased): {}", item_genre) + # Filter out empty strings return [g for g in item_genre if g] diff --git a/beetsplug/lastgenre/aliases.ini b/beetsplug/lastgenre/aliases.ini new file mode 100644 index 000000000..6c0b38ae6 --- /dev/null +++ b/beetsplug/lastgenre/aliases.ini @@ -0,0 +1,31 @@ +[regexaliases] +# Common genres with variable spelling +(?i)electronic[\s-]?music? = Electronic +(?i)alt[\s-]?rock|alternative(\srock)?|alternative|alt = Alternative Rock +(?i)hip[\s-]?hop|hiphop|rap = Hip Hop +down[ /-]*(beat|tempo?) = downtempo + +# Common genres with ampersands +(?i)r[\s&]*b|rhythm and blues = R&B +d(rum)?[\s]*(&|and|n)?[\s]*b(ass)? = Drum And Bass +drill[ n/]*bass = drill & bass +r(hythm)?[ n/]*b(lues)? = rhythm & blues +rock[ n/]*roll = rock & roll + +# Consistent delimiters +(c|k|j)[ /]*(folk|goth|hip hop|pop|rock|ska|trance) = \g<1>-\g<2> +(euro(?!p[ae]+n?)|neo|post|tech(?!n[io]))[ /]*(\w+) = \g<1>-\g<2> +(g?lo)[ /]*fi = \g<1>-fi +(glitch|hip|jazz|trip)y?([ /]*hip)?[ /]*hop = \g<1> hop +(p|g)[ /]*funk = \g<1>-funk +nu[/-]*(disco|jazz|metal|soul) = nu \g<1> +synth[ /-]+(\w+) = synth\g<1> + +# Abbreviation related +alt(/|er*n(/|ati[fv](es?)?))? = alternative +avant(/|gard)? = avantgarde +prog+(/|r*es+(/|i[fv]e?)?)? = progressive +ps?y(/|ch(/|[aeo]l?del+i[ca]+)?)? = psychedelic +trad(/|ition(/|al)?)?-? = traditional + +