diff --git a/NEWS b/NEWS index c1703cd70..be7750c8a 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,12 @@ under the specified directory. * Album art is now automatically discovered and copied from the imported directories when available. +* When choosing the "as-is" import album (or doing a non-autotagged + import), every album either has an "album artist" set or is + marked as a compilation (Various Artists). The choice is made + based on the homogeneity of the tracks' artists. This prevents + compilations that are imported as-is from being scattered across + many directories after they are imported. * The release label for albums and tracks is now fetched from MusicBrainz, written to files, and stored in the database. * Release year and label are now shown in the candidate selection diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index 0317b8d4c..f2c347510 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -16,14 +16,13 @@ """ import os import logging -from collections import defaultdict import re from munkres import Munkres from unidecode import unidecode from beets.autotag import mb from beets import library, mediafile, plugins -from beets.util import levenshtein, sorted_walk +from beets.util import levenshtein, sorted_walk, plurality # Try 5 releases. In the future, this should be more dynamic: let the # probability of continuing to the next release be inversely @@ -180,25 +179,6 @@ def string_dist(str1, str2): return dist -def _plurality(objs): - """Given a sequence of comparable objects, returns the object that - is most common in the set and if it is the only object is the set. - """ - # Calculate frequencies. - freqs = defaultdict(int) - for obj in objs: - freqs[obj] += 1 - - # Find object with maximum frequency. - max_freq = 0 - res = None - for obj, freq in freqs.items(): - if freq > max_freq: - max_freq = freq - res = obj - - return res, len(freqs) <= 1 - def current_metadata(items): """Returns the most likely artist and album for a set of Items. Each is determined by tag reflected by the plurality of the Items. @@ -208,7 +188,8 @@ def current_metadata(items): consensus = {} for key in keys: values = [getattr(item, key) for item in items] - likelies[key], consensus[key] = _plurality(values) + likelies[key], freq = plurality(values) + consensus[key] = (freq == len(values)) return likelies['artist'], likelies['album'], consensus['artist'] def order_items(items, trackinfo): diff --git a/beets/importer.py b/beets/importer.py index 1840d984e..9a06a5a13 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -25,7 +25,7 @@ from beets import library import beets.autotag.art from beets import plugins from beets.util import pipeline -from beets.util import syspath, normpath +from beets.util import syspath, normpath, plurality from beets.util.enumeration import enum action = enum( @@ -35,6 +35,8 @@ action = enum( QUEUE_SIZE = 128 STATE_FILE = os.path.expanduser('~/.beetsstate') +SINGLE_ARTIST_THRESH = 0.25 +VARIOUS_ARTISTS = u'Various Artists' # Global logger. log = logging.getLogger('beets') @@ -120,6 +122,37 @@ def _item_duplicate_check(lib, artist, title, recent=None): return True +def _infer_album_fields(album, task): + """Given an album and an associated import task, massage the + album-level metadata. This ensures that the album artist is set + and that the "compilation" flag is set automatically. + """ + assert task.is_album + + if task.choice_flag == action.ASIS: + # Taking metadata "as-is". Guess whether this album is VA. + plur_artist, freq = plurality([i.artist for i in task.items]) + if freq > 1 and float(freq) / len(task.items) >= SINGLE_ARTIST_THRESH: + # Single-artist album. + album.albumartist = plur_artist + album.comp = False + else: + # VA. + album.albumartist = VARIOUS_ARTISTS + album.comp = True + + elif task.choice_flag == action.APPLY: + # Applying autotagged metadata. Just get AA from the first + # item. + if not album.albumartist: + album.albumartist = task.items[0].artist + if not album.mb_albumartistid: + album.mb_albumartistid = task.items[0].mb_artistid + + else: + assert False + + # Utilities for reading and writing the beets progress file, which # allows long tagging tasks to be resumed when they pause (or crash). PROGRESS_KEY = 'tagprogress' @@ -289,19 +322,6 @@ class ImportTask(object): def should_fetch_art(self): """Should album art be downloaded for this album?""" return self.should_write_tags() and self.is_album - def should_infer_aa(self): - """When creating an album structure, should the album artist - field be inferred from the plurality of track artists? - """ - assert self.is_album - if self.choice_flag == action.APPLY: - # Album artist comes from the info dictionary. - return False - elif self.choice_flag == action.ASIS: - # As-is imports likely don't have an album artist. - return True - else: - assert False def should_skip(self): """After a choice has been made, returns True if this is a sentinel or it has been marked for skipping. @@ -462,12 +482,14 @@ def apply_choices(config): if task.should_skip(): continue - # Change metadata, move, and copy. + # Change metadata. if task.should_write_tags(): if task.is_album: autotag.apply_metadata(task.items, task.info) else: autotag.apply_item_metadata(task.item, task.info) + + # Move/copy files. items = task.items if task.is_album else [task.item] if config.copy and config.delete: task.old_paths = [os.path.realpath(syspath(item.path)) @@ -483,8 +505,8 @@ def apply_choices(config): try: if task.is_album: # Add an album. - album = lib.add_album(task.items, - infer_aa = task.should_infer_aa()) + album = lib.add_album(task.items) + _infer_album_fields(album, task) task.album_id = album.id else: # Add tracks. diff --git a/beets/library.py b/beets/library.py index 5777be6b9..a3b408236 100644 --- a/beets/library.py +++ b/beets/library.py @@ -1024,25 +1024,15 @@ class Library(BaseLibrary): if record: return Album(self, dict(record)) - def add_album(self, items, infer_aa=False): + def add_album(self, items): """Create a new album in the database with metadata derived from its items. The items are added to the database if they - don't yet have an ID. Returns an Album object. If the - infer_aa flag is set, then the album artist field will be - guessed from artist fields when not present. + don't yet have an ID. Returns an Album object. """ # Set the metadata from the first item. #fixme: check for consensus? item_values = dict( (key, getattr(items[0], key)) for key in ALBUM_KEYS_ITEM) - if infer_aa: - namemap = { - 'albumartist': 'artist', - 'mb_albumartistid': 'mb_artistid', - } - for field, itemfield in namemap.iteritems(): - if not item_values[field]: - item_values[field] = getattr(items[0], itemfield) sql = 'INSERT INTO albums (%s) VALUES (%s)' % \ (', '.join(ALBUM_KEYS_ITEM), diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 45f945226..041981106 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -16,6 +16,7 @@ import os import sys import re +from collections import defaultdict MAX_FILENAME_LENGTH = 200 @@ -253,3 +254,22 @@ def levenshtein(s1, s2): previous_row = current_row return previous_row[-1] + +def plurality(objs): + """Given a sequence of comparable objects, returns the object that + is most common in the set and the frequency of that object. + """ + # Calculate frequencies. + freqs = defaultdict(int) + for obj in objs: + freqs[obj] += 1 + + # Find object with maximum frequency. + max_freq = 0 + res = None + for obj, freq in freqs.items(): + if freq > max_freq: + max_freq = freq + res = obj + + return res, max_freq diff --git a/beetsplug/lastid.py b/beetsplug/lastid.py index e8b95a182..667d2e5ab 100644 --- a/beetsplug/lastid.py +++ b/beetsplug/lastid.py @@ -20,6 +20,7 @@ from __future__ import with_statement from beets.plugins import BeetsPlugin from beets import autotag from beets.autotag import mb +from beets.util import plurality import lastfp import logging @@ -70,8 +71,8 @@ def get_cur_artist(items): artist_ids.append(last_data['artist_mbid']) # Vote on the most popular artist. - artist, _ = autotag._plurality(artists) - artist_id, _ = autotag._plurality(artist_ids) + artist, _ = plurality(artists) + artist_id, _ = plurality(artist_ids) return artist, artist_id diff --git a/test/test_autotag.py b/test/test_autotag.py index e15d1f385..bbeb9647a 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -22,25 +22,26 @@ import re import _common from beets import autotag from beets.library import Item +from beets.util import plurality class PluralityTest(unittest.TestCase): def test_plurality_consensus(self): objs = [1, 1, 1, 1] - obj, consensus = autotag._plurality(objs) + obj, freq = plurality(objs) self.assertEqual(obj, 1) - self.assertTrue(consensus) + self.assertEqual(freq, 4) def test_plurality_near_consensus(self): objs = [1, 1, 2, 1] - obj, consensus = autotag._plurality(objs) + obj, freq = plurality(objs) self.assertEqual(obj, 1) - self.assertFalse(consensus) + self.assertEqual(freq, 3) def test_plurality_conflict(self): objs = [1, 1, 2, 2, 3] - obj, consensus = autotag._plurality(objs) + obj, freq = plurality(objs) self.assert_(obj in (1, 2)) - self.assertFalse(consensus) + self.assertEqual(freq, 2) def test_current_metadata_finds_pluralities(self): items = [Item({'artist': 'The Beetles', 'album': 'The White Album'}), diff --git a/test/test_db.py b/test/test_db.py index 2729ad235..3d152d765 100644 --- a/test/test_db.py +++ b/test/test_db.py @@ -477,27 +477,6 @@ class AlbumInfoTest(unittest.TestCase): self.assertEqual(ai.album, self.i.album) self.assertEqual(ai.year, self.i.year) - def test_infer_aa_gets_artist_and_id(self): - i = item() - i.albumartist = '' - i.mb_albumartistid = '' - ai = self.lib.add_album((i,), infer_aa=True) - self.assertEqual(ai.albumartist, i.artist) - self.assertEqual(ai.mb_albumartistid, i.mb_artistid) - - def test_no_infer_aa_sets_blank_artist_and_id(self): - i = item() - i.albumartist = '' - i.mb_albumartistid = '' - ai = self.lib.add_album((i,), infer_aa=False) - self.assertEqual(ai.albumartist, '') - self.assertEqual(ai.mb_albumartistid, '') - - def test_infer_aa_lets_album_values_override(self): - ai = self.lib.add_album((self.i,), infer_aa=True) - self.assertEqual(ai.albumartist, self.i.albumartist) - self.assertEqual(ai.mb_albumartistid, self.i.mb_albumartistid) - def test_albuminfo_stores_art(self): ai = self.lib.get_album(self.i) ai.artpath = '/my/great/art' diff --git a/test/test_importer.py b/test/test_importer.py index 08e2f734d..6071603c9 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -138,6 +138,21 @@ class NonAutotaggedImportTest(unittest.TestCase): paths = self._run_import(['sometrack'], singletons=True) self.assertTrue(os.path.exists(paths[0])) +# Utilities for invoking the apply_choices coroutine. +def _call_apply(coros, items, info): + task = importer.ImportTask(None, None, None) + task.is_album = True + task.set_choice((info, items)) + if not isinstance(coros, list): + coros = [coros] + for coro in coros: + task = coro.send(task) +def _call_apply_choice(coro, items, choice): + task = importer.ImportTask(None, None, items) + task.is_album = True + task.set_choice(choice) + coro.send(task) + class ImportApplyTest(unittest.TestCase, _common.ExtraAsserts): def setUp(self): self.libdir = os.path.join(_common.RSRC, 'testlibdir') @@ -169,28 +184,13 @@ class ImportApplyTest(unittest.TestCase, _common.ExtraAsserts): def tearDown(self): shutil.rmtree(self.libdir) - def _call_apply(self, coros, items, info): - task = importer.ImportTask(None, None, None) - task.is_album = True - task.set_choice((info, items)) - if not isinstance(coros, list): - coros = [coros] - for coro in coros: - task = coro.send(task) - - def _call_apply_choice(self, coro, items, choice): - task = importer.ImportTask(None, None, items) - task.is_album = True - task.set_choice(choice) - coro.send(task) - def test_finalize_no_delete(self): config = _common.iconfig(self.lib, delete=False) applyc = importer.apply_choices(config) applyc.next() finalize = importer.finalize(config) finalize.next() - self._call_apply([applyc, finalize], [self.i], self.info) + _call_apply([applyc, finalize], [self.i], self.info) self.assertExists(self.srcpath) def test_finalize_with_delete(self): @@ -199,13 +199,13 @@ class ImportApplyTest(unittest.TestCase, _common.ExtraAsserts): applyc.next() finalize = importer.finalize(config) finalize.next() - self._call_apply([applyc, finalize], [self.i], self.info) + _call_apply([applyc, finalize], [self.i], self.info) self.assertNotExists(self.srcpath) def test_apply_asis_uses_album_path(self): coro = importer.apply_choices(_common.iconfig(self.lib)) coro.next() # Prime coroutine. - self._call_apply_choice(coro, [self.i], importer.action.ASIS) + _call_apply_choice(coro, [self.i], importer.action.ASIS) self.assertExists( os.path.join(self.libdir, self.lib.path_formats['default']+'.mp3') ) @@ -213,7 +213,7 @@ class ImportApplyTest(unittest.TestCase, _common.ExtraAsserts): def test_apply_match_uses_album_path(self): coro = importer.apply_choices(_common.iconfig(self.lib)) coro.next() # Prime coroutine. - self._call_apply(coro, [self.i], self.info) + _call_apply(coro, [self.i], self.info) self.assertExists( os.path.join(self.libdir, self.lib.path_formats['default']+'.mp3') ) @@ -236,11 +236,126 @@ class ImportApplyTest(unittest.TestCase, _common.ExtraAsserts): coro.send(importer.ImportTask.done_sentinel('toppath')) # Just test no exception for now. +class AsIsApplyTest(unittest.TestCase): + def setUp(self): + self.dbpath = os.path.join(_common.RSRC, 'templib.blb') + self.lib = library.Library(self.dbpath) + self.config = _common.iconfig(self.lib, write=False, copy=False) + + # Make an "album" that has a homogenous artist. (Modified by + # individual tests.) + i1 = _common.item() + i2 = _common.item() + i3 = _common.item() + i1.title = 'first item' + i2.title = 'second item' + i3.title = 'third item' + i1.comp = i2.comp = i3.comp = False + i1.albumartist = i2.albumartist = i3.albumartist = '' + self.items = [i1, i2, i3] + + def tearDown(self): + os.remove(self.dbpath) + + def _apply_result(self): + """Run the "apply" coroutine and get the resulting Album.""" + coro = importer.apply_choices(self.config) + coro.next() + _call_apply_choice(coro, self.items, importer.action.ASIS) + + return self.lib.albums()[0] + + def test_asis_homogenous_va_not_set(self): + alb = self._apply_result() + self.assertFalse(alb.comp) + self.assertEqual(alb.albumartist, self.items[2].artist) + + def test_asis_heterogenous_va_set(self): + self.items[0].artist = 'another artist' + self.items[1].artist = 'some other artist' + alb = self._apply_result() + self.assertTrue(alb.comp) + self.assertEqual(alb.albumartist, 'Various Artists') + + def test_asis_majority_artist_va_not_set(self): + self.items[0].artist = 'another artist' + alb = self._apply_result() + self.assertFalse(alb.comp) + self.assertEqual(alb.albumartist, self.items[2].artist) + +class InferAlbumDataTest(unittest.TestCase): + def setUp(self): + self.lib = library.Library(':memory:') + + i1 = _common.item() + i2 = _common.item() + i3 = _common.item() + i1.title = 'first item' + i2.title = 'second item' + i3.title = 'third item' + i1.comp = i2.comp = i3.comp = False + i1.albumartist = i2.albumartist = i3.albumartist = '' + i1.mb_albumartistid = i2.mb_albumartistid = i3.mb_albumartistid = '' + self.items = [i1, i2, i3] + + self.album = self.lib.add_album(self.items) + self.task = importer.ImportTask(path='a path', toppath='top path', + items=self.items) + self.task.set_null_match() + + def test_asis_homogenous_single_artist(self): + self.task.set_choice(importer.action.ASIS) + importer._infer_album_fields(self.album, self.task) + self.assertFalse(self.album.comp) + self.assertEqual(self.album.albumartist, self.items[2].artist) + + def test_asis_heterogenous_va(self): + self.items[0].artist = 'another artist' + self.items[1].artist = 'some other artist' + self.lib.save() + self.task.set_choice(importer.action.ASIS) + + importer._infer_album_fields(self.album, self.task) + + self.assertTrue(self.album.comp) + self.assertEqual(self.album.albumartist, 'Various Artists') + + def test_asis_majority_artist_single_artist(self): + self.items[0].artist = 'another artist' + self.lib.save() + self.task.set_choice(importer.action.ASIS) + + importer._infer_album_fields(self.album, self.task) + + self.assertFalse(self.album.comp) + self.assertEqual(self.album.albumartist, self.items[2].artist) + + def test_apply_gets_artist_and_id(self): + self.task.set_choice(({}, self.items)) # APPLY + + importer._infer_album_fields(self.album, self.task) + + self.assertEqual(self.album.albumartist, self.items[0].artist) + self.assertEqual(self.album.mb_albumartistid, self.items[0].mb_artistid) + + def test_apply_lets_album_values_override(self): + self.album.albumartist = 'some album artist' + self.album.mb_albumartistid = 'some album artist id' + self.task.set_choice(({}, self.items)) # APPLY + + importer._infer_album_fields(self.album, self.task) + + self.assertEqual(self.album.albumartist, + 'some album artist') + self.assertEqual(self.album.mb_albumartistid, + 'some album artist id') + + class DuplicateCheckTest(unittest.TestCase): def setUp(self): self.lib = library.Library(':memory:') self.i = _common.item() - self.album = self.lib.add_album([self.i], True) + self.album = self.lib.add_album([self.i]) def test_duplicate_album(self): res = importer._duplicate_check(self.lib, self.i.albumartist,