mirror of
https://github.com/beetbox/beets.git
synced 2025-12-16 05:34:47 +01:00
clean up genre canonicalization (#264)
- Canonicalization is disabled by default. (This prevents pyyaml from being a dependency if you don't use canonicalization.) - Config value to set the tree file. - Python style. - Added YAML file to MANIFEST.in. - Documentation.
This commit is contained in:
parent
9fd10c0186
commit
c03fb658c7
3 changed files with 54 additions and 24 deletions
|
|
@ -19,6 +19,7 @@ recursive-include beetsplug/web/static *
|
|||
|
||||
# And for the lastgenre plugin.
|
||||
include beetsplug/lastgenre/genres.txt
|
||||
include beetsplug/lastgenre/genres-tree.yaml
|
||||
|
||||
# Exclude junk.
|
||||
global-exclude .DS_Store
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ from __future__ import with_statement
|
|||
import logging
|
||||
import pylast
|
||||
import os
|
||||
from yaml import load
|
||||
|
||||
from beets import plugins
|
||||
from beets import ui
|
||||
|
|
@ -69,36 +68,39 @@ def _tags_to_genre(tags):
|
|||
elif not options['whitelist']:
|
||||
return tags[0].title()
|
||||
|
||||
for tag in tags:
|
||||
genre = find_allowed(
|
||||
find_parents(tag.lower(), options['branches']))
|
||||
if genre:
|
||||
return genre
|
||||
|
||||
return None
|
||||
if options.get('c14n'):
|
||||
# Use the canonicalization tree.
|
||||
for tag in tags:
|
||||
genre = find_allowed(find_parents(tag, options['branches']))
|
||||
if genre:
|
||||
return genre
|
||||
else:
|
||||
# Just use the flat whitelist.
|
||||
return find_allowed(tags)
|
||||
|
||||
def flatten_tree(elem, path, branches):
|
||||
"""Flatten nested lists/dictionaries into lists of strings (branches).
|
||||
"""Flatten nested lists/dictionaries into lists of strings
|
||||
(branches).
|
||||
"""
|
||||
if not path:
|
||||
path = []
|
||||
|
||||
if isinstance(elem, dict):
|
||||
for (k, v) in elem.items() :
|
||||
for (k, v) in elem.items():
|
||||
flatten_tree(v, path + [k], branches)
|
||||
elif isinstance(elem, list):
|
||||
for sub in elem:
|
||||
flatten_tree(sub, path, branches)
|
||||
else:
|
||||
branches.append(path + [elem])
|
||||
branches.append(path + [unicode(elem)])
|
||||
|
||||
def find_parents(candidate, branches):
|
||||
"""Find parents genre of a given genre, ordered from the closest to the
|
||||
further parent.
|
||||
"""Find parents genre of a given genre, ordered from the closest to
|
||||
the further parent.
|
||||
"""
|
||||
for branch in branches:
|
||||
try:
|
||||
idx = branch.index(candidate)
|
||||
idx = branch.index(candidate.lower())
|
||||
return list(reversed(branch[:idx+1]))
|
||||
except ValueError:
|
||||
continue
|
||||
|
|
@ -108,13 +110,15 @@ def find_allowed(genres):
|
|||
"""Returns the first genre that is present in the genre whitelist or
|
||||
None if no genre is suitable.
|
||||
"""
|
||||
for g in list(genres):
|
||||
if g in options['whitelist']:
|
||||
return g.title()
|
||||
for genre in list(genres):
|
||||
if genre.lower() in options['whitelist']:
|
||||
return genre.title()
|
||||
return None
|
||||
|
||||
options = {
|
||||
'whitelist': None,
|
||||
'branches': None,
|
||||
'c14n': False,
|
||||
}
|
||||
class LastGenrePlugin(plugins.BeetsPlugin):
|
||||
def configure(self, config):
|
||||
|
|
@ -134,12 +138,20 @@ class LastGenrePlugin(plugins.BeetsPlugin):
|
|||
whitelist.add(line)
|
||||
options['whitelist'] = whitelist
|
||||
|
||||
# Read the genres tree for canonicalization
|
||||
genres_tree = load(open(C14N_TREE, 'r'))
|
||||
branches = []
|
||||
flatten_tree(genres_tree, [], branches)
|
||||
options['branches'] = branches
|
||||
# Read the genres tree for canonicalization if enabled.
|
||||
c14n_filename = ui.config_val(config, 'lastgenre', 'canonical', None)
|
||||
if c14n_filename is not None:
|
||||
c14n_filename = c14n_filename.strip()
|
||||
if not c14n_filename:
|
||||
c14n_filename = C14N_TREE
|
||||
c14n_filename = normpath(c14n_filename)
|
||||
|
||||
from yaml import load
|
||||
genres_tree = load(open(c14n_filename, 'r'))
|
||||
branches = []
|
||||
flatten_tree(genres_tree, [], branches)
|
||||
options['branches'] = branches
|
||||
options['c14n'] = True
|
||||
|
||||
@LastGenrePlugin.listen('album_imported')
|
||||
def album_imported(lib, album):
|
||||
|
|
|
|||
|
|
@ -31,10 +31,27 @@ configuration value::
|
|||
whitelist: /path/to/genres.txt
|
||||
|
||||
The genre list file should contain one genre per line. Blank lines are ignored.
|
||||
|
||||
For the curious, the default genre list is generated by a `script that scrapes
|
||||
Wikipedia`_.
|
||||
|
||||
.. _pip: http://www.pip-installer.org/
|
||||
.. _pylast: http://code.google.com/p/pylast/
|
||||
.. _script that scrapes Wikipedia: https://gist.github.com/1241307
|
||||
|
||||
Canonicalization
|
||||
----------------
|
||||
|
||||
The plugin can also *canonicalize* genres, meaning that more obscure genres can
|
||||
be turned into coarser-grained ones that are present in the whitelist. This
|
||||
works using a tree of nested genre names, represented using `YAML`_, where the
|
||||
leaves of the tree represent the most specific genres.
|
||||
|
||||
To enable canonicalization, first install the `pyyaml`_ module (``pip install
|
||||
pyyaml``). Then set the ``canonical`` configuration value::
|
||||
|
||||
[lastgenre]
|
||||
canonical:
|
||||
|
||||
Leaving this value blank will use a built-in canonicalization tree. You can also
|
||||
set it to a path, just like the ``whitelist`` config value, to use your own
|
||||
tree.
|
||||
|
|
|
|||
Loading…
Reference in a new issue