diff --git a/MANIFEST.in b/MANIFEST.in index bedad317a..560ef63db 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -19,6 +19,7 @@ recursive-include beetsplug/web/static * # And for the lastgenre plugin. include beetsplug/lastgenre/genres.txt +include beetsplug/lastgenre/genres-tree.yaml # Exclude junk. global-exclude .DS_Store diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 59a688227..c228b5cbd 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -29,7 +29,6 @@ from __future__ import with_statement import logging import pylast import os -from yaml import load from beets import plugins from beets import ui @@ -69,36 +68,39 @@ def _tags_to_genre(tags): elif not options['whitelist']: return tags[0].title() - for tag in tags: - genre = find_allowed( - find_parents(tag.lower(), options['branches'])) - if genre: - return genre - - return None + if options.get('c14n'): + # Use the canonicalization tree. + for tag in tags: + genre = find_allowed(find_parents(tag, options['branches'])) + if genre: + return genre + else: + # Just use the flat whitelist. + return find_allowed(tags) def flatten_tree(elem, path, branches): - """Flatten nested lists/dictionaries into lists of strings (branches). + """Flatten nested lists/dictionaries into lists of strings + (branches). """ if not path: path = [] if isinstance(elem, dict): - for (k, v) in elem.items() : + for (k, v) in elem.items(): flatten_tree(v, path + [k], branches) elif isinstance(elem, list): for sub in elem: flatten_tree(sub, path, branches) else: - branches.append(path + [elem]) + branches.append(path + [unicode(elem)]) def find_parents(candidate, branches): - """Find parents genre of a given genre, ordered from the closest to the - further parent. + """Find parents genre of a given genre, ordered from the closest to + the further parent. """ for branch in branches: try: - idx = branch.index(candidate) + idx = branch.index(candidate.lower()) return list(reversed(branch[:idx+1])) except ValueError: continue @@ -108,13 +110,15 @@ def find_allowed(genres): """Returns the first genre that is present in the genre whitelist or None if no genre is suitable. """ - for g in list(genres): - if g in options['whitelist']: - return g.title() + for genre in list(genres): + if genre.lower() in options['whitelist']: + return genre.title() return None options = { 'whitelist': None, + 'branches': None, + 'c14n': False, } class LastGenrePlugin(plugins.BeetsPlugin): def configure(self, config): @@ -134,12 +138,20 @@ class LastGenrePlugin(plugins.BeetsPlugin): whitelist.add(line) options['whitelist'] = whitelist - # Read the genres tree for canonicalization - genres_tree = load(open(C14N_TREE, 'r')) - branches = [] - flatten_tree(genres_tree, [], branches) - options['branches'] = branches - + # Read the genres tree for canonicalization if enabled. + c14n_filename = ui.config_val(config, 'lastgenre', 'canonical', None) + if c14n_filename is not None: + c14n_filename = c14n_filename.strip() + if not c14n_filename: + c14n_filename = C14N_TREE + c14n_filename = normpath(c14n_filename) + + from yaml import load + genres_tree = load(open(c14n_filename, 'r')) + branches = [] + flatten_tree(genres_tree, [], branches) + options['branches'] = branches + options['c14n'] = True @LastGenrePlugin.listen('album_imported') def album_imported(lib, album): diff --git a/docs/plugins/lastgenre.rst b/docs/plugins/lastgenre.rst index 4197345d0..9699701cc 100644 --- a/docs/plugins/lastgenre.rst +++ b/docs/plugins/lastgenre.rst @@ -31,10 +31,27 @@ configuration value:: whitelist: /path/to/genres.txt The genre list file should contain one genre per line. Blank lines are ignored. - For the curious, the default genre list is generated by a `script that scrapes Wikipedia`_. .. _pip: http://www.pip-installer.org/ .. _pylast: http://code.google.com/p/pylast/ .. _script that scrapes Wikipedia: https://gist.github.com/1241307 + +Canonicalization +---------------- + +The plugin can also *canonicalize* genres, meaning that more obscure genres can +be turned into coarser-grained ones that are present in the whitelist. This +works using a tree of nested genre names, represented using `YAML`_, where the +leaves of the tree represent the most specific genres. + +To enable canonicalization, first install the `pyyaml`_ module (``pip install +pyyaml``). Then set the ``canonical`` configuration value:: + + [lastgenre] + canonical: + +Leaving this value blank will use a built-in canonicalization tree. You can also +set it to a path, just like the ``whitelist`` config value, to use your own +tree.