diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 6646e6af3..364766f16 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -15,11 +15,6 @@ """Gets genres for imported music based on Last.fm tags. Uses a provided whitelist file to determine which tags are valid genres. -The genre whitelist can be specified like so in .beetsconfig: - - [lastgenre] - whitelist=/path/to/genres.txt - The included (default) genre list was produced by scraping Wikipedia. The scraper script used is available here: https://gist.github.com/1241307 @@ -38,7 +33,6 @@ from beets import library log = logging.getLogger('beets') LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY) -C14N_TREE = os.path.join(os.path.dirname(__file__), 'genres-tree.yaml') PYLAST_EXCEPTIONS = ( pylast.WSError, @@ -49,10 +43,12 @@ PYLAST_EXCEPTIONS = ( # Core genre identification routine. -def _tags_for(obj): - """Given a pylast entity (album or track), returns a list of - tag names for that entity. Returns an empty list if the entity is +def _tags_for(obj, min_weight=None): + """Given a pylast entity (album or track), return a list of + tag names for that entity. Return an empty list if the entity is not found or another error occurs. + + If `min_weight` is specified, tags are filtered by weight. """ try: # Work around an inconsistency in pylast where @@ -66,66 +62,14 @@ def _tags_for(obj): log.debug(u'last.fm error: %s' % unicode(exc)) return [] - tags = [] - min_weight = config['lastgenre']['min_weight'].get(int) - count = config['lastgenre']['count'].get(int) + # Filter by weight (optionally). + if min_weight: + res = [el for el in res if (el.weight or 0) >= min_weight] - dbg = [] - for el in res: - weight = int(el.weight or 0) - tag = el.item.get_name().lower() - if _is_allowed(tag): - if min_weight > -1 and min_weight > weight and len(tags) > 0: - return tags - tags.append(tag) - dbg.append(u'{0} [{1}]'.format(tag, weight)) - if len(tags) == count: - break - log.debug(u'lastfm.tag (min. {0}): {1}'.format( - min_weight, u', '.join(dbg) - )) - return tags + # Get strings from tags. + res = [el.item.get_name().lower() for el in res] - -def _is_allowed(genre): - """Determine whether the genre is present in the whitelist, - returning a boolean. - """ - if genre is None: - return False - if not options['whitelist'] or genre in options['whitelist']: - return True - return False - - -def _strings_to_genre(tags): - """Given a list of strings, return a genre by joining them into a - single string and (optionally) canonicalizing each. - """ - if not tags: - return None - - if options.get('c14n'): - # Use the canonicalization tree. - out = [] - for tag in tags: - for parent in find_parents(tag, options['branches']): - if _is_allowed(parent): - out.append(parent) - break - tags = out - - tags = [t.title() for t in tags] - return config['lastgenre']['separator'].get(unicode).join( - tags[:config['lastgenre']['count'].get(int)] - ) - - -def fetch_genre(lastfm_obj): - """Return the genre for a pylast entity or None if no suitable genre - can be found. Ex. 'Electronic, House, Dance' - """ - return _strings_to_genre(_tags_for(lastfm_obj)) + return res # Canonicalization tree processing. @@ -161,61 +105,10 @@ def find_parents(candidate, branches): return [candidate] -# Cached entity lookups. - -_genre_cache = {} - - -def _cached_lookup(entity, method, *args): - """Get a genre based on the named entity using the callable `method` - whose arguments are given in the sequence `args`. The genre lookup - is cached based on the entity name and the arguments. - """ - # Shortcut if we're missing metadata. - if any(not s for s in args): - return None - - key = u'{0}.{1}'.format(entity, u'-'.join(unicode(a) for a in args)) - if key in _genre_cache: - return _genre_cache[key] - else: - genre = fetch_genre(method(*args)) - _genre_cache[key] = genre - return genre - - -def fetch_album_genre(obj): - """Return the album genre for this Item or Album. - """ - return _cached_lookup(u'album', LASTFM.get_album, obj.albumartist, - obj.album) - - -def fetch_album_artist_genre(obj): - """Return the album artist genre for this Item or Album. - """ - return _cached_lookup(u'artist', LASTFM.get_artist, obj.albumartist) - - -def fetch_artist_genre(item): - """Returns the track artist genre for this Item. - """ - return _cached_lookup(u'artist', LASTFM.get_artist, item.artist) - - -def fetch_track_genre(obj): - """Returns the track genre for this Item. - """ - return _cached_lookup(u'track', LASTFM.get_track, obj.artist, obj.title) - - # Main plugin logic. -options = { - 'whitelist': None, - 'branches': None, - 'c14n': False, -} +WHITELIST = os.path.join(os.path.dirname(__file__), 'genres.txt') +C14N_TREE = os.path.join(os.path.dirname(__file__), 'genres-tree.yaml') class LastGenrePlugin(plugins.BeetsPlugin): @@ -223,43 +116,49 @@ class LastGenrePlugin(plugins.BeetsPlugin): super(LastGenrePlugin, self).__init__() self.config.add({ - 'whitelist': os.path.join(os.path.dirname(__file__), 'genres.txt'), + 'whitelist': True, 'min_weight': 10, 'count': 1, 'fallback': None, - 'canonical': None, + 'canonical': False, 'source': 'album', 'force': True, 'auto': True, 'separator': u', ', }) + self.setup() + + def setup(self): + """Setup plugin from config options + """ if self.config['auto']: self.import_stages = [self.imported] - # Read the whitelist file. - wl_filename = self.config['whitelist'].as_filename() - whitelist = set() - with open(wl_filename) as f: - for line in f: - line = line.decode('utf8').strip().lower() - if line: - whitelist.add(line) - options['whitelist'] = whitelist + self._genre_cache = {} + + # Read the whitelist file if enabled. + self.whitelist = set() + wl_filename = self.config['whitelist'].get() + if wl_filename in (True, ''): # Indicates the default whitelist. + wl_filename = WHITELIST + if wl_filename: + wl_filename = normpath(wl_filename) + with open(wl_filename, 'r') as f: + for line in f: + line = line.decode('utf8').strip().lower() + if line and not line.startswith(u'#'): + self.whitelist.add(line) # Read the genres tree for canonicalization if enabled. + self.c14n_branches = [] c14n_filename = self.config['canonical'].get() - if c14n_filename is not None: - c14n_filename = c14n_filename.strip() - if not c14n_filename: - c14n_filename = C14N_TREE + if c14n_filename in (True, ''): # Default tree. + c14n_filename = C14N_TREE + if c14n_filename: c14n_filename = normpath(c14n_filename) - genres_tree = yaml.load(open(c14n_filename, 'r')) - branches = [] - flatten_tree(genres_tree, [], branches) - options['branches'] = branches - options['c14n'] = True + flatten_tree(genres_tree, [], self.c14n_branches) @property def sources(self): @@ -274,6 +173,97 @@ class LastGenrePlugin(plugins.BeetsPlugin): elif source == 'artist': return 'artist', + def _resolve_genres(self, tags): + """Given a list of strings, return a genre by joining them into a + single string and (optionally) canonicalizing each. + """ + if not tags: + return None + + count = self.config['count'].get(int) + if self.c14n_branches: + # Extend the list to consider tags parents in the c14n tree + tags_all = [] + for tag in tags: + # Add parents that are in the whitelist, or add the oldest + # ancestor if no whitelist + if self.whitelist: + parents = [x for x in find_parents(tag, self.c14n_branches) + if self._is_allowed(x)] + else: + parents = [find_parents(tag, self.c14n_branches)[-1]] + + tags_all += parents + if len(tags_all) >= count: + break + tags = tags_all + # c14n only adds allowed genres but we may have had forbidden genres in + # the original tags list + tags = [x.title() for x in tags if self._is_allowed(x)] + + return self.config['separator'].get(unicode).join( + tags[:self.config['count'].get(int)] + ) + + def fetch_genre(self, lastfm_obj): + """Return the genre for a pylast entity or None if no suitable genre + can be found. Ex. 'Electronic, House, Dance' + """ + min_weight = self.config['min_weight'].get(int) + return self._resolve_genres(_tags_for(lastfm_obj, min_weight)) + + def _is_allowed(self, genre): + """Determine whether the genre is present in the whitelist, + returning a boolean. + """ + if genre is None: + return False + if not self.whitelist or genre in self.whitelist: + return True + return False + + # Cached entity lookups. + + def _cached_lookup(self, entity, method, *args): + """Get a genre based on the named entity using the callable `method` + whose arguments are given in the sequence `args`. The genre lookup + is cached based on the entity name and the arguments. + """ + # Shortcut if we're missing metadata. + if any(not s for s in args): + return None + + key = u'{0}.{1}'.format(entity, u'-'.join(unicode(a) for a in args)) + if key in self._genre_cache: + return self._genre_cache[key] + else: + genre = self.fetch_genre(method(*args)) + self._genre_cache[key] = genre + return genre + + def fetch_album_genre(self, obj): + """Return the album genre for this Item or Album. + """ + return self._cached_lookup(u'album', LASTFM.get_album, obj.albumartist, + obj.album) + + def fetch_album_artist_genre(self, obj): + """Return the album artist genre for this Item or Album. + """ + return self._cached_lookup(u'artist', LASTFM.get_artist, + obj.albumartist) + + def fetch_artist_genre(self, item): + """Returns the track artist genre for this Item. + """ + return self._cached_lookup(u'artist', LASTFM.get_artist, item.artist) + + def fetch_track_genre(self, obj): + """Returns the track genre for this Item. + """ + return self._cached_lookup(u'track', LASTFM.get_track, obj.artist, + obj.title) + def _get_genre(self, obj): """Get the genre string for an Album or Item object based on self.sources. Return a `(genre, source)` pair. The @@ -285,20 +275,21 @@ class LastGenrePlugin(plugins.BeetsPlugin): - fallback - None """ + # Shortcut to existing genre if not forcing. - if not self.config['force'] and _is_allowed(obj.genre): + if not self.config['force'] and self._is_allowed(obj.genre): return obj.genre, 'keep' # Track genre (for Items only). if isinstance(obj, library.Item): if 'track' in self.sources: - result = fetch_track_genre(obj) + result = self.fetch_track_genre(obj) if result: return result, 'track' # Album genre. if 'album' in self.sources: - result = fetch_album_genre(obj) + result = self.fetch_album_genre(obj) if result: return result, 'album' @@ -306,18 +297,18 @@ class LastGenrePlugin(plugins.BeetsPlugin): if 'artist' in self.sources: result = None if isinstance(obj, library.Item): - result = fetch_artist_genre(obj) + result = self.fetch_artist_genre(obj) elif obj.albumartist != 'Various Artists': - result = fetch_album_artist_genre(obj) + result = self.fetch_album_artist_genre(obj) else: # For "Various Artists", pick the most popular track genre. item_genres = [] for item in obj.items(): item_genre = None if 'track' in self.sources: - item_genre = fetch_track_genre(item) + item_genre = self.fetch_track_genre(item) if not item_genre: - item_genre = fetch_artist_genre(item) + item_genre = self.fetch_artist_genre(item) if item_genre: item_genres.append(item_genre) if item_genres: @@ -328,7 +319,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): # Filter the existing genre. if obj.genre: - result = _strings_to_genre([obj.genre]) + result = self._resolve_genres([obj.genre]) if result: return result, 'original' diff --git a/docs/plugins/lastgenre.rst b/docs/plugins/lastgenre.rst index 7a4d19a51..58f3432be 100644 --- a/docs/plugins/lastgenre.rst +++ b/docs/plugins/lastgenre.rst @@ -27,6 +27,8 @@ internal whitelist, but you can set your own in the config file using the lastgenre: whitelist: /path/to/genres.txt +…or go for no whitelist altogether by setting the option to `false`. + The genre list file should contain one genre per line. Blank lines are ignored. For the curious, the default genre list is generated by a `script that scrapes Wikipedia`_. @@ -37,7 +39,7 @@ Wikipedia`_. By default, beets will always fetch new genres, even if the files already have once. To instead leave genres in place in when they pass the whitelist, set -the ``force`` option to "no". +the ``force`` option to `no`. If no genre is found, the file will be left unchanged. To instead specify a fallback genre, use the ``fallback`` configuration option. You can, of @@ -58,9 +60,9 @@ leaves of the tree represent the most specific genres. To enable canonicalization, set the ``canonical`` configuration value:: lastgenre: - canonical: '' + canonical: true -Setting this value to the empty string will use a built-in canonicalization +Setting this value to `true` will use a built-in canonicalization tree. You can also set it to a path, just like the ``whitelist`` config value, to use your own tree. @@ -108,9 +110,6 @@ the ``min_weight`` config option:: lastgenre: min_weight: 15 -However, if no tag with a weight greater then ``min_weight`` is found, the -plugin uses the next-most-popular tag. - Running Manually ---------------- diff --git a/test/test_lastgenre.py b/test/test_lastgenre.py new file mode 100644 index 000000000..8fa5dc217 --- /dev/null +++ b/test/test_lastgenre.py @@ -0,0 +1,137 @@ +# This file is part of beets. +# Copyright 2014, Fabrice Laporte. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Tests for the 'lastgenre' plugin.""" + +from _common import unittest +from beetsplug import lastgenre +from beets import config + +from helper import TestHelper + + +class LastGenrePluginTest(unittest.TestCase, TestHelper): + def setUp(self): + self.setup_beets() + self.plugin = lastgenre.LastGenrePlugin() + + def tearDown(self): + self.teardown_beets() + + def _setup_config(self, whitelist=False, canonical=False, count=1): + config['lastgenre']['canonical'] = canonical + config['lastgenre']['count'] = count + if isinstance(whitelist, (bool, basestring)): + # Filename, default, or disabled. + config['lastgenre']['whitelist'] = whitelist + self.plugin.setup() + if not isinstance(whitelist, (bool, basestring)): + # Explicit list of genres. + self.plugin.whitelist = whitelist + + def test_default(self): + """Fetch genres with whitelist and c14n deactivated + """ + self._setup_config() + self.assertEqual(self.plugin._resolve_genres(['delta blues']), + 'Delta Blues') + + def test_c14n_only(self): + """Default c14n tree funnels up to most common genre except for *wrong* + genres that stay unchanged. + """ + self._setup_config(canonical=True, count=99) + self.assertEqual(self.plugin._resolve_genres(['delta blues']), + 'Blues') + self.assertEqual(self.plugin._resolve_genres(['iota blues']), + 'Iota Blues') + + def test_whitelist_only(self): + """Default whitelist rejects *wrong* (non existing) genres. + """ + self._setup_config(whitelist=True) + self.assertEqual(self.plugin._resolve_genres(['iota blues']), + '') + + def test_whitelist_c14n(self): + """Default whitelist and c14n both activated result in all parents + genres being selected (from specific to common). + """ + self._setup_config(canonical=True, whitelist=True, count=99) + self.assertEqual(self.plugin._resolve_genres(['delta blues']), + 'Delta Blues, Country Blues, Blues') + + def test_whitelist_custom(self): + """Keep only genres that are in the whitelist. + """ + self._setup_config(whitelist=set(['blues', 'rock', 'jazz']), + count=2) + self.assertEqual(self.plugin._resolve_genres(['pop', 'blues']), + 'Blues') + + self._setup_config(canonical='', whitelist=set(['rock'])) + self.assertEqual(self.plugin._resolve_genres(['delta blues']), + '') + + def test_count(self): + """Keep the n first genres, as we expect them to be sorted from more to + less popular. + """ + self._setup_config(whitelist=set(['blues', 'rock', 'jazz']), + count=2) + self.assertEqual(self.plugin._resolve_genres( + ['jazz', 'pop', 'rock', 'blues']), + 'Jazz, Rock') + + def test_count_c14n(self): + """Keep the n first genres, after having applied c14n when necessary + """ + self._setup_config(whitelist=set(['blues', 'rock', 'jazz']), + canonical=True, + count=2) + # thanks to c14n, 'blues' superseeds 'country blues' and takes the + # second slot + self.assertEqual(self.plugin._resolve_genres( + ['jazz', 'pop', 'country blues', 'rock']), + 'Jazz, Blues') + + def test_c14n_whitelist(self): + """Genres first pass through c14n and are then filtered + """ + self._setup_config(canonical=True, whitelist=set(['rock'])) + self.assertEqual(self.plugin._resolve_genres(['delta blues']), + '') + + def test_empty_string_enables_canonical(self): + """For backwards compatibility, setting the `canonical` option + to the empty string enables it using the default tree. + """ + self._setup_config(canonical='', count=99) + self.assertEqual(self.plugin._resolve_genres(['delta blues']), + 'Blues') + + def test_empty_string_enables_whitelist(self): + """Again for backwards compatibility, setting the `whitelist` + option to the empty string enables the default set of genres. + """ + self._setup_config(whitelist='') + self.assertEqual(self.plugin._resolve_genres(['iota blues']), + '') + + +def suite(): + return unittest.TestLoader().loadTestsFromName(__name__) + +if __name__ == '__main__': + unittest.main(defaultTest='suite')