clean up genre canonicalization (#264)

- Canonicalization is disabled by default. (This prevents pyyaml from being a
  dependency if you don't use canonicalization.)
- Config value to set the tree file.
- Python style.
- Added YAML file to MANIFEST.in.
- Documentation.
This commit is contained in:
Adrian Sampson 2011-12-01 12:14:11 -08:00
parent 9fd10c0186
commit c03fb658c7
3 changed files with 54 additions and 24 deletions

View file

@ -19,6 +19,7 @@ recursive-include beetsplug/web/static *
# And for the lastgenre plugin.
include beetsplug/lastgenre/genres.txt
include beetsplug/lastgenre/genres-tree.yaml
# Exclude junk.
global-exclude .DS_Store

View file

@ -29,7 +29,6 @@ from __future__ import with_statement
import logging
import pylast
import os
from yaml import load
from beets import plugins
from beets import ui
@ -69,36 +68,39 @@ def _tags_to_genre(tags):
elif not options['whitelist']:
return tags[0].title()
for tag in tags:
genre = find_allowed(
find_parents(tag.lower(), options['branches']))
if genre:
return genre
return None
if options.get('c14n'):
# Use the canonicalization tree.
for tag in tags:
genre = find_allowed(find_parents(tag, options['branches']))
if genre:
return genre
else:
# Just use the flat whitelist.
return find_allowed(tags)
def flatten_tree(elem, path, branches):
"""Flatten nested lists/dictionaries into lists of strings (branches).
"""Flatten nested lists/dictionaries into lists of strings
(branches).
"""
if not path:
path = []
if isinstance(elem, dict):
for (k, v) in elem.items() :
for (k, v) in elem.items():
flatten_tree(v, path + [k], branches)
elif isinstance(elem, list):
for sub in elem:
flatten_tree(sub, path, branches)
else:
branches.append(path + [elem])
branches.append(path + [unicode(elem)])
def find_parents(candidate, branches):
"""Find parents genre of a given genre, ordered from the closest to the
further parent.
"""Find parents genre of a given genre, ordered from the closest to
the further parent.
"""
for branch in branches:
try:
idx = branch.index(candidate)
idx = branch.index(candidate.lower())
return list(reversed(branch[:idx+1]))
except ValueError:
continue
@ -108,13 +110,15 @@ def find_allowed(genres):
"""Returns the first genre that is present in the genre whitelist or
None if no genre is suitable.
"""
for g in list(genres):
if g in options['whitelist']:
return g.title()
for genre in list(genres):
if genre.lower() in options['whitelist']:
return genre.title()
return None
options = {
'whitelist': None,
'branches': None,
'c14n': False,
}
class LastGenrePlugin(plugins.BeetsPlugin):
def configure(self, config):
@ -134,12 +138,20 @@ class LastGenrePlugin(plugins.BeetsPlugin):
whitelist.add(line)
options['whitelist'] = whitelist
# Read the genres tree for canonicalization
genres_tree = load(open(C14N_TREE, 'r'))
branches = []
flatten_tree(genres_tree, [], branches)
options['branches'] = branches
# Read the genres tree for canonicalization if enabled.
c14n_filename = ui.config_val(config, 'lastgenre', 'canonical', None)
if c14n_filename is not None:
c14n_filename = c14n_filename.strip()
if not c14n_filename:
c14n_filename = C14N_TREE
c14n_filename = normpath(c14n_filename)
from yaml import load
genres_tree = load(open(c14n_filename, 'r'))
branches = []
flatten_tree(genres_tree, [], branches)
options['branches'] = branches
options['c14n'] = True
@LastGenrePlugin.listen('album_imported')
def album_imported(lib, album):

View file

@ -31,10 +31,27 @@ configuration value::
whitelist: /path/to/genres.txt
The genre list file should contain one genre per line. Blank lines are ignored.
For the curious, the default genre list is generated by a `script that scrapes
Wikipedia`_.
.. _pip: http://www.pip-installer.org/
.. _pylast: http://code.google.com/p/pylast/
.. _script that scrapes Wikipedia: https://gist.github.com/1241307
Canonicalization
----------------
The plugin can also *canonicalize* genres, meaning that more obscure genres can
be turned into coarser-grained ones that are present in the whitelist. This
works using a tree of nested genre names, represented using `YAML`_, where the
leaves of the tree represent the most specific genres.
To enable canonicalization, first install the `pyyaml`_ module (``pip install
pyyaml``). Then set the ``canonical`` configuration value::
[lastgenre]
canonical:
Leaving this value blank will use a built-in canonicalization tree. You can also
set it to a path, just like the ``whitelist`` config value, to use your own
tree.