mirror of
https://github.com/beetbox/beets.git
synced 2025-12-10 02:22:25 +01:00
Merge pull request #728 from sampsyo/lastgenre-tests-new
Tests for lastgenre (cleaned up)
This commit is contained in:
commit
362db8f22f
3 changed files with 281 additions and 154 deletions
|
|
@ -15,11 +15,6 @@
|
|||
"""Gets genres for imported music based on Last.fm tags.
|
||||
|
||||
Uses a provided whitelist file to determine which tags are valid genres.
|
||||
The genre whitelist can be specified like so in .beetsconfig:
|
||||
|
||||
[lastgenre]
|
||||
whitelist=/path/to/genres.txt
|
||||
|
||||
The included (default) genre list was produced by scraping Wikipedia.
|
||||
The scraper script used is available here:
|
||||
https://gist.github.com/1241307
|
||||
|
|
@ -38,7 +33,6 @@ from beets import library
|
|||
log = logging.getLogger('beets')
|
||||
|
||||
LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY)
|
||||
C14N_TREE = os.path.join(os.path.dirname(__file__), 'genres-tree.yaml')
|
||||
|
||||
PYLAST_EXCEPTIONS = (
|
||||
pylast.WSError,
|
||||
|
|
@ -49,10 +43,12 @@ PYLAST_EXCEPTIONS = (
|
|||
|
||||
# Core genre identification routine.
|
||||
|
||||
def _tags_for(obj):
|
||||
"""Given a pylast entity (album or track), returns a list of
|
||||
tag names for that entity. Returns an empty list if the entity is
|
||||
def _tags_for(obj, min_weight=None):
|
||||
"""Given a pylast entity (album or track), return a list of
|
||||
tag names for that entity. Return an empty list if the entity is
|
||||
not found or another error occurs.
|
||||
|
||||
If `min_weight` is specified, tags are filtered by weight.
|
||||
"""
|
||||
try:
|
||||
# Work around an inconsistency in pylast where
|
||||
|
|
@ -66,66 +62,14 @@ def _tags_for(obj):
|
|||
log.debug(u'last.fm error: %s' % unicode(exc))
|
||||
return []
|
||||
|
||||
tags = []
|
||||
min_weight = config['lastgenre']['min_weight'].get(int)
|
||||
count = config['lastgenre']['count'].get(int)
|
||||
# Filter by weight (optionally).
|
||||
if min_weight:
|
||||
res = [el for el in res if (el.weight or 0) >= min_weight]
|
||||
|
||||
dbg = []
|
||||
for el in res:
|
||||
weight = int(el.weight or 0)
|
||||
tag = el.item.get_name().lower()
|
||||
if _is_allowed(tag):
|
||||
if min_weight > -1 and min_weight > weight and len(tags) > 0:
|
||||
return tags
|
||||
tags.append(tag)
|
||||
dbg.append(u'{0} [{1}]'.format(tag, weight))
|
||||
if len(tags) == count:
|
||||
break
|
||||
log.debug(u'lastfm.tag (min. {0}): {1}'.format(
|
||||
min_weight, u', '.join(dbg)
|
||||
))
|
||||
return tags
|
||||
# Get strings from tags.
|
||||
res = [el.item.get_name().lower() for el in res]
|
||||
|
||||
|
||||
def _is_allowed(genre):
|
||||
"""Determine whether the genre is present in the whitelist,
|
||||
returning a boolean.
|
||||
"""
|
||||
if genre is None:
|
||||
return False
|
||||
if not options['whitelist'] or genre in options['whitelist']:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _strings_to_genre(tags):
|
||||
"""Given a list of strings, return a genre by joining them into a
|
||||
single string and (optionally) canonicalizing each.
|
||||
"""
|
||||
if not tags:
|
||||
return None
|
||||
|
||||
if options.get('c14n'):
|
||||
# Use the canonicalization tree.
|
||||
out = []
|
||||
for tag in tags:
|
||||
for parent in find_parents(tag, options['branches']):
|
||||
if _is_allowed(parent):
|
||||
out.append(parent)
|
||||
break
|
||||
tags = out
|
||||
|
||||
tags = [t.title() for t in tags]
|
||||
return config['lastgenre']['separator'].get(unicode).join(
|
||||
tags[:config['lastgenre']['count'].get(int)]
|
||||
)
|
||||
|
||||
|
||||
def fetch_genre(lastfm_obj):
|
||||
"""Return the genre for a pylast entity or None if no suitable genre
|
||||
can be found. Ex. 'Electronic, House, Dance'
|
||||
"""
|
||||
return _strings_to_genre(_tags_for(lastfm_obj))
|
||||
return res
|
||||
|
||||
|
||||
# Canonicalization tree processing.
|
||||
|
|
@ -161,61 +105,10 @@ def find_parents(candidate, branches):
|
|||
return [candidate]
|
||||
|
||||
|
||||
# Cached entity lookups.
|
||||
|
||||
_genre_cache = {}
|
||||
|
||||
|
||||
def _cached_lookup(entity, method, *args):
|
||||
"""Get a genre based on the named entity using the callable `method`
|
||||
whose arguments are given in the sequence `args`. The genre lookup
|
||||
is cached based on the entity name and the arguments.
|
||||
"""
|
||||
# Shortcut if we're missing metadata.
|
||||
if any(not s for s in args):
|
||||
return None
|
||||
|
||||
key = u'{0}.{1}'.format(entity, u'-'.join(unicode(a) for a in args))
|
||||
if key in _genre_cache:
|
||||
return _genre_cache[key]
|
||||
else:
|
||||
genre = fetch_genre(method(*args))
|
||||
_genre_cache[key] = genre
|
||||
return genre
|
||||
|
||||
|
||||
def fetch_album_genre(obj):
|
||||
"""Return the album genre for this Item or Album.
|
||||
"""
|
||||
return _cached_lookup(u'album', LASTFM.get_album, obj.albumartist,
|
||||
obj.album)
|
||||
|
||||
|
||||
def fetch_album_artist_genre(obj):
|
||||
"""Return the album artist genre for this Item or Album.
|
||||
"""
|
||||
return _cached_lookup(u'artist', LASTFM.get_artist, obj.albumartist)
|
||||
|
||||
|
||||
def fetch_artist_genre(item):
|
||||
"""Returns the track artist genre for this Item.
|
||||
"""
|
||||
return _cached_lookup(u'artist', LASTFM.get_artist, item.artist)
|
||||
|
||||
|
||||
def fetch_track_genre(obj):
|
||||
"""Returns the track genre for this Item.
|
||||
"""
|
||||
return _cached_lookup(u'track', LASTFM.get_track, obj.artist, obj.title)
|
||||
|
||||
|
||||
# Main plugin logic.
|
||||
|
||||
options = {
|
||||
'whitelist': None,
|
||||
'branches': None,
|
||||
'c14n': False,
|
||||
}
|
||||
WHITELIST = os.path.join(os.path.dirname(__file__), 'genres.txt')
|
||||
C14N_TREE = os.path.join(os.path.dirname(__file__), 'genres-tree.yaml')
|
||||
|
||||
|
||||
class LastGenrePlugin(plugins.BeetsPlugin):
|
||||
|
|
@ -223,43 +116,49 @@ class LastGenrePlugin(plugins.BeetsPlugin):
|
|||
super(LastGenrePlugin, self).__init__()
|
||||
|
||||
self.config.add({
|
||||
'whitelist': os.path.join(os.path.dirname(__file__), 'genres.txt'),
|
||||
'whitelist': True,
|
||||
'min_weight': 10,
|
||||
'count': 1,
|
||||
'fallback': None,
|
||||
'canonical': None,
|
||||
'canonical': False,
|
||||
'source': 'album',
|
||||
'force': True,
|
||||
'auto': True,
|
||||
'separator': u', ',
|
||||
})
|
||||
|
||||
self.setup()
|
||||
|
||||
def setup(self):
|
||||
"""Setup plugin from config options
|
||||
"""
|
||||
if self.config['auto']:
|
||||
self.import_stages = [self.imported]
|
||||
|
||||
# Read the whitelist file.
|
||||
wl_filename = self.config['whitelist'].as_filename()
|
||||
whitelist = set()
|
||||
with open(wl_filename) as f:
|
||||
for line in f:
|
||||
line = line.decode('utf8').strip().lower()
|
||||
if line:
|
||||
whitelist.add(line)
|
||||
options['whitelist'] = whitelist
|
||||
self._genre_cache = {}
|
||||
|
||||
# Read the whitelist file if enabled.
|
||||
self.whitelist = set()
|
||||
wl_filename = self.config['whitelist'].get()
|
||||
if wl_filename in (True, ''): # Indicates the default whitelist.
|
||||
wl_filename = WHITELIST
|
||||
if wl_filename:
|
||||
wl_filename = normpath(wl_filename)
|
||||
with open(wl_filename, 'r') as f:
|
||||
for line in f:
|
||||
line = line.decode('utf8').strip().lower()
|
||||
if line and not line.startswith(u'#'):
|
||||
self.whitelist.add(line)
|
||||
|
||||
# Read the genres tree for canonicalization if enabled.
|
||||
self.c14n_branches = []
|
||||
c14n_filename = self.config['canonical'].get()
|
||||
if c14n_filename is not None:
|
||||
c14n_filename = c14n_filename.strip()
|
||||
if not c14n_filename:
|
||||
c14n_filename = C14N_TREE
|
||||
if c14n_filename in (True, ''): # Default tree.
|
||||
c14n_filename = C14N_TREE
|
||||
if c14n_filename:
|
||||
c14n_filename = normpath(c14n_filename)
|
||||
|
||||
genres_tree = yaml.load(open(c14n_filename, 'r'))
|
||||
branches = []
|
||||
flatten_tree(genres_tree, [], branches)
|
||||
options['branches'] = branches
|
||||
options['c14n'] = True
|
||||
flatten_tree(genres_tree, [], self.c14n_branches)
|
||||
|
||||
@property
|
||||
def sources(self):
|
||||
|
|
@ -274,6 +173,97 @@ class LastGenrePlugin(plugins.BeetsPlugin):
|
|||
elif source == 'artist':
|
||||
return 'artist',
|
||||
|
||||
def _resolve_genres(self, tags):
|
||||
"""Given a list of strings, return a genre by joining them into a
|
||||
single string and (optionally) canonicalizing each.
|
||||
"""
|
||||
if not tags:
|
||||
return None
|
||||
|
||||
count = self.config['count'].get(int)
|
||||
if self.c14n_branches:
|
||||
# Extend the list to consider tags parents in the c14n tree
|
||||
tags_all = []
|
||||
for tag in tags:
|
||||
# Add parents that are in the whitelist, or add the oldest
|
||||
# ancestor if no whitelist
|
||||
if self.whitelist:
|
||||
parents = [x for x in find_parents(tag, self.c14n_branches)
|
||||
if self._is_allowed(x)]
|
||||
else:
|
||||
parents = [find_parents(tag, self.c14n_branches)[-1]]
|
||||
|
||||
tags_all += parents
|
||||
if len(tags_all) >= count:
|
||||
break
|
||||
tags = tags_all
|
||||
# c14n only adds allowed genres but we may have had forbidden genres in
|
||||
# the original tags list
|
||||
tags = [x.title() for x in tags if self._is_allowed(x)]
|
||||
|
||||
return self.config['separator'].get(unicode).join(
|
||||
tags[:self.config['count'].get(int)]
|
||||
)
|
||||
|
||||
def fetch_genre(self, lastfm_obj):
|
||||
"""Return the genre for a pylast entity or None if no suitable genre
|
||||
can be found. Ex. 'Electronic, House, Dance'
|
||||
"""
|
||||
min_weight = self.config['min_weight'].get(int)
|
||||
return self._resolve_genres(_tags_for(lastfm_obj, min_weight))
|
||||
|
||||
def _is_allowed(self, genre):
|
||||
"""Determine whether the genre is present in the whitelist,
|
||||
returning a boolean.
|
||||
"""
|
||||
if genre is None:
|
||||
return False
|
||||
if not self.whitelist or genre in self.whitelist:
|
||||
return True
|
||||
return False
|
||||
|
||||
# Cached entity lookups.
|
||||
|
||||
def _cached_lookup(self, entity, method, *args):
|
||||
"""Get a genre based on the named entity using the callable `method`
|
||||
whose arguments are given in the sequence `args`. The genre lookup
|
||||
is cached based on the entity name and the arguments.
|
||||
"""
|
||||
# Shortcut if we're missing metadata.
|
||||
if any(not s for s in args):
|
||||
return None
|
||||
|
||||
key = u'{0}.{1}'.format(entity, u'-'.join(unicode(a) for a in args))
|
||||
if key in self._genre_cache:
|
||||
return self._genre_cache[key]
|
||||
else:
|
||||
genre = self.fetch_genre(method(*args))
|
||||
self._genre_cache[key] = genre
|
||||
return genre
|
||||
|
||||
def fetch_album_genre(self, obj):
|
||||
"""Return the album genre for this Item or Album.
|
||||
"""
|
||||
return self._cached_lookup(u'album', LASTFM.get_album, obj.albumartist,
|
||||
obj.album)
|
||||
|
||||
def fetch_album_artist_genre(self, obj):
|
||||
"""Return the album artist genre for this Item or Album.
|
||||
"""
|
||||
return self._cached_lookup(u'artist', LASTFM.get_artist,
|
||||
obj.albumartist)
|
||||
|
||||
def fetch_artist_genre(self, item):
|
||||
"""Returns the track artist genre for this Item.
|
||||
"""
|
||||
return self._cached_lookup(u'artist', LASTFM.get_artist, item.artist)
|
||||
|
||||
def fetch_track_genre(self, obj):
|
||||
"""Returns the track genre for this Item.
|
||||
"""
|
||||
return self._cached_lookup(u'track', LASTFM.get_track, obj.artist,
|
||||
obj.title)
|
||||
|
||||
def _get_genre(self, obj):
|
||||
"""Get the genre string for an Album or Item object based on
|
||||
self.sources. Return a `(genre, source)` pair. The
|
||||
|
|
@ -285,20 +275,21 @@ class LastGenrePlugin(plugins.BeetsPlugin):
|
|||
- fallback
|
||||
- None
|
||||
"""
|
||||
|
||||
# Shortcut to existing genre if not forcing.
|
||||
if not self.config['force'] and _is_allowed(obj.genre):
|
||||
if not self.config['force'] and self._is_allowed(obj.genre):
|
||||
return obj.genre, 'keep'
|
||||
|
||||
# Track genre (for Items only).
|
||||
if isinstance(obj, library.Item):
|
||||
if 'track' in self.sources:
|
||||
result = fetch_track_genre(obj)
|
||||
result = self.fetch_track_genre(obj)
|
||||
if result:
|
||||
return result, 'track'
|
||||
|
||||
# Album genre.
|
||||
if 'album' in self.sources:
|
||||
result = fetch_album_genre(obj)
|
||||
result = self.fetch_album_genre(obj)
|
||||
if result:
|
||||
return result, 'album'
|
||||
|
||||
|
|
@ -306,18 +297,18 @@ class LastGenrePlugin(plugins.BeetsPlugin):
|
|||
if 'artist' in self.sources:
|
||||
result = None
|
||||
if isinstance(obj, library.Item):
|
||||
result = fetch_artist_genre(obj)
|
||||
result = self.fetch_artist_genre(obj)
|
||||
elif obj.albumartist != 'Various Artists':
|
||||
result = fetch_album_artist_genre(obj)
|
||||
result = self.fetch_album_artist_genre(obj)
|
||||
else:
|
||||
# For "Various Artists", pick the most popular track genre.
|
||||
item_genres = []
|
||||
for item in obj.items():
|
||||
item_genre = None
|
||||
if 'track' in self.sources:
|
||||
item_genre = fetch_track_genre(item)
|
||||
item_genre = self.fetch_track_genre(item)
|
||||
if not item_genre:
|
||||
item_genre = fetch_artist_genre(item)
|
||||
item_genre = self.fetch_artist_genre(item)
|
||||
if item_genre:
|
||||
item_genres.append(item_genre)
|
||||
if item_genres:
|
||||
|
|
@ -328,7 +319,7 @@ class LastGenrePlugin(plugins.BeetsPlugin):
|
|||
|
||||
# Filter the existing genre.
|
||||
if obj.genre:
|
||||
result = _strings_to_genre([obj.genre])
|
||||
result = self._resolve_genres([obj.genre])
|
||||
if result:
|
||||
return result, 'original'
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ internal whitelist, but you can set your own in the config file using the
|
|||
lastgenre:
|
||||
whitelist: /path/to/genres.txt
|
||||
|
||||
…or go for no whitelist altogether by setting the option to `false`.
|
||||
|
||||
The genre list file should contain one genre per line. Blank lines are ignored.
|
||||
For the curious, the default genre list is generated by a `script that scrapes
|
||||
Wikipedia`_.
|
||||
|
|
@ -37,7 +39,7 @@ Wikipedia`_.
|
|||
|
||||
By default, beets will always fetch new genres, even if the files already have
|
||||
once. To instead leave genres in place in when they pass the whitelist, set
|
||||
the ``force`` option to "no".
|
||||
the ``force`` option to `no`.
|
||||
|
||||
If no genre is found, the file will be left unchanged. To instead specify a
|
||||
fallback genre, use the ``fallback`` configuration option. You can, of
|
||||
|
|
@ -58,9 +60,9 @@ leaves of the tree represent the most specific genres.
|
|||
To enable canonicalization, set the ``canonical`` configuration value::
|
||||
|
||||
lastgenre:
|
||||
canonical: ''
|
||||
canonical: true
|
||||
|
||||
Setting this value to the empty string will use a built-in canonicalization
|
||||
Setting this value to `true` will use a built-in canonicalization
|
||||
tree. You can also set it to a path, just like the ``whitelist`` config value,
|
||||
to use your own tree.
|
||||
|
||||
|
|
@ -108,9 +110,6 @@ the ``min_weight`` config option::
|
|||
lastgenre:
|
||||
min_weight: 15
|
||||
|
||||
However, if no tag with a weight greater then ``min_weight`` is found, the
|
||||
plugin uses the next-most-popular tag.
|
||||
|
||||
|
||||
Running Manually
|
||||
----------------
|
||||
|
|
|
|||
137
test/test_lastgenre.py
Normal file
137
test/test_lastgenre.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
# This file is part of beets.
|
||||
# Copyright 2014, Fabrice Laporte.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the
|
||||
# "Software"), to deal in the Software without restriction, including
|
||||
# without limitation the rights to use, copy, modify, merge, publish,
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so, subject to
|
||||
# the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
|
||||
"""Tests for the 'lastgenre' plugin."""
|
||||
|
||||
from _common import unittest
|
||||
from beetsplug import lastgenre
|
||||
from beets import config
|
||||
|
||||
from helper import TestHelper
|
||||
|
||||
|
||||
class LastGenrePluginTest(unittest.TestCase, TestHelper):
|
||||
def setUp(self):
|
||||
self.setup_beets()
|
||||
self.plugin = lastgenre.LastGenrePlugin()
|
||||
|
||||
def tearDown(self):
|
||||
self.teardown_beets()
|
||||
|
||||
def _setup_config(self, whitelist=False, canonical=False, count=1):
|
||||
config['lastgenre']['canonical'] = canonical
|
||||
config['lastgenre']['count'] = count
|
||||
if isinstance(whitelist, (bool, basestring)):
|
||||
# Filename, default, or disabled.
|
||||
config['lastgenre']['whitelist'] = whitelist
|
||||
self.plugin.setup()
|
||||
if not isinstance(whitelist, (bool, basestring)):
|
||||
# Explicit list of genres.
|
||||
self.plugin.whitelist = whitelist
|
||||
|
||||
def test_default(self):
|
||||
"""Fetch genres with whitelist and c14n deactivated
|
||||
"""
|
||||
self._setup_config()
|
||||
self.assertEqual(self.plugin._resolve_genres(['delta blues']),
|
||||
'Delta Blues')
|
||||
|
||||
def test_c14n_only(self):
|
||||
"""Default c14n tree funnels up to most common genre except for *wrong*
|
||||
genres that stay unchanged.
|
||||
"""
|
||||
self._setup_config(canonical=True, count=99)
|
||||
self.assertEqual(self.plugin._resolve_genres(['delta blues']),
|
||||
'Blues')
|
||||
self.assertEqual(self.plugin._resolve_genres(['iota blues']),
|
||||
'Iota Blues')
|
||||
|
||||
def test_whitelist_only(self):
|
||||
"""Default whitelist rejects *wrong* (non existing) genres.
|
||||
"""
|
||||
self._setup_config(whitelist=True)
|
||||
self.assertEqual(self.plugin._resolve_genres(['iota blues']),
|
||||
'')
|
||||
|
||||
def test_whitelist_c14n(self):
|
||||
"""Default whitelist and c14n both activated result in all parents
|
||||
genres being selected (from specific to common).
|
||||
"""
|
||||
self._setup_config(canonical=True, whitelist=True, count=99)
|
||||
self.assertEqual(self.plugin._resolve_genres(['delta blues']),
|
||||
'Delta Blues, Country Blues, Blues')
|
||||
|
||||
def test_whitelist_custom(self):
|
||||
"""Keep only genres that are in the whitelist.
|
||||
"""
|
||||
self._setup_config(whitelist=set(['blues', 'rock', 'jazz']),
|
||||
count=2)
|
||||
self.assertEqual(self.plugin._resolve_genres(['pop', 'blues']),
|
||||
'Blues')
|
||||
|
||||
self._setup_config(canonical='', whitelist=set(['rock']))
|
||||
self.assertEqual(self.plugin._resolve_genres(['delta blues']),
|
||||
'')
|
||||
|
||||
def test_count(self):
|
||||
"""Keep the n first genres, as we expect them to be sorted from more to
|
||||
less popular.
|
||||
"""
|
||||
self._setup_config(whitelist=set(['blues', 'rock', 'jazz']),
|
||||
count=2)
|
||||
self.assertEqual(self.plugin._resolve_genres(
|
||||
['jazz', 'pop', 'rock', 'blues']),
|
||||
'Jazz, Rock')
|
||||
|
||||
def test_count_c14n(self):
|
||||
"""Keep the n first genres, after having applied c14n when necessary
|
||||
"""
|
||||
self._setup_config(whitelist=set(['blues', 'rock', 'jazz']),
|
||||
canonical=True,
|
||||
count=2)
|
||||
# thanks to c14n, 'blues' superseeds 'country blues' and takes the
|
||||
# second slot
|
||||
self.assertEqual(self.plugin._resolve_genres(
|
||||
['jazz', 'pop', 'country blues', 'rock']),
|
||||
'Jazz, Blues')
|
||||
|
||||
def test_c14n_whitelist(self):
|
||||
"""Genres first pass through c14n and are then filtered
|
||||
"""
|
||||
self._setup_config(canonical=True, whitelist=set(['rock']))
|
||||
self.assertEqual(self.plugin._resolve_genres(['delta blues']),
|
||||
'')
|
||||
|
||||
def test_empty_string_enables_canonical(self):
|
||||
"""For backwards compatibility, setting the `canonical` option
|
||||
to the empty string enables it using the default tree.
|
||||
"""
|
||||
self._setup_config(canonical='', count=99)
|
||||
self.assertEqual(self.plugin._resolve_genres(['delta blues']),
|
||||
'Blues')
|
||||
|
||||
def test_empty_string_enables_whitelist(self):
|
||||
"""Again for backwards compatibility, setting the `whitelist`
|
||||
option to the empty string enables the default set of genres.
|
||||
"""
|
||||
self._setup_config(whitelist='')
|
||||
self.assertEqual(self.plugin._resolve_genres(['iota blues']),
|
||||
'')
|
||||
|
||||
|
||||
def suite():
|
||||
return unittest.TestLoader().loadTestsFromName(__name__)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(defaultTest='suite')
|
||||
Loading…
Reference in a new issue