From 70f0bc5b6c8b2d0899ec1fe82f4879faebcd9cba Mon Sep 17 00:00:00 2001 From: Meet Udeshi Date: Tue, 31 Oct 2017 11:28:44 +0530 Subject: [PATCH 1/6] Implement album merging for duplicates Fixes #112 --- beets/importer.py | 31 ++++++++++++++++++++++++++++++- beets/ui/commands.py | 4 +++- test/helper.py | 4 +++- test/test_importer.py | 6 ++++++ 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index e7d8b40bc..b9c00c8c7 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -443,6 +443,7 @@ class ImportTask(BaseImportTask): self.candidates = [] self.rec = None self.should_remove_duplicates = False + self.should_merge_duplicates = False self.is_album = True self.search_ids = [] # user-supplied candidate IDs. @@ -1037,6 +1038,10 @@ class ArchiveImportTask(SentinelImportTask): self.extracted = True self.toppath = extract_to +class MergedImportTask(ImportTask): + def __init__(self, toppath, paths, items): + super(MergedImportTask, self).__init__(toppath, paths, items) + class ImportTaskFactory(object): """Generate album and singleton import tasks for all media files @@ -1352,7 +1357,27 @@ def user_query(session, task): ]) return pipeline.multiple(ipl.pull()) - resolve_duplicates(session, task) + if type(task) != MergedImportTask: + resolve_duplicates(session, task) + if task.should_merge_duplicates: + duplicate_items = task.duplicate_items(session.lib) + for item in duplicate_items: + item.id = None + item.album_id = None + + duplicate_paths = [item.path for item in duplicate_items] + + merged_task = MergedImportTask(None, + task.paths + duplicate_paths, + task.items + duplicate_items) + + ipl = pipeline.Pipeline([ + iter([merged_task]), + lookup_candidates(session), + user_query(session) + ]) + return pipeline.multiple(ipl.pull()) + apply_choice(session, task) return task @@ -1373,6 +1398,7 @@ def resolve_duplicates(session, task): u'skip': u's', u'keep': u'k', u'remove': u'r', + u'merge': u'm', u'ask': u'a', }) log.debug(u'default action for duplicates: {0}', duplicate_action) @@ -1386,6 +1412,9 @@ def resolve_duplicates(session, task): elif duplicate_action == u'r': # Remove old. task.should_remove_duplicates = True + elif duplicate_action == u'm': + # Merge duplicates together + task.should_merge_duplicates = True else: # No default action set; ask the session. session.resolve_duplicate(task, found_duplicates) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index a4b433925..c8beb11e2 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -791,7 +791,7 @@ class TerminalImportSession(importer.ImportSession): )) sel = ui.input_options( - (u'Skip new', u'Keep both', u'Remove old') + (u'Skip new', u'Keep both', u'Remove old', u'Merge all') ) if sel == u's': @@ -803,6 +803,8 @@ class TerminalImportSession(importer.ImportSession): elif sel == u'r': # Remove old. task.should_remove_duplicates = True + elif sel == u'm': + task.should_merge_duplicates = True else: assert False diff --git a/test/helper.py b/test/helper.py index ebf039fca..5b3bec8b5 100644 --- a/test/helper.py +++ b/test/helper.py @@ -535,7 +535,7 @@ class TestImportSession(importer.ImportSession): choose_item = choose_match - Resolution = Enum('Resolution', 'REMOVE SKIP KEEPBOTH') + Resolution = Enum('Resolution', 'REMOVE SKIP KEEPBOTH MERGE') default_resolution = 'REMOVE' @@ -553,6 +553,8 @@ class TestImportSession(importer.ImportSession): task.set_choice(importer.action.SKIP) elif res == self.Resolution.REMOVE: task.should_remove_duplicates = True + elif res == self.Resolution.MERGE: + task.should_merge_duplicates = True def generate_album_info(album_id, track_ids): diff --git a/test/test_importer.py b/test/test_importer.py index 1e0dd1fb9..c6b021f33 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -1248,6 +1248,12 @@ class ImportDuplicateAlbumTest(unittest.TestCase, TestHelper, item = self.lib.items().get() self.assertEqual(item.title, u't\xeftle 0') + def test_merge_duplicate_album(self): + self.importer.default_resolution = self.importer.Resolution.MERGE + self.importer.run() + + self.assertEqual(len(self.lib.albums()), 1) + def test_twice_in_import_dir(self): self.skipTest('write me') From 28fef2e77a9d58d1961a4f8f9eb1232996ccaf2a Mon Sep 17 00:00:00 2001 From: Meet Udeshi Date: Wed, 1 Nov 2017 02:24:19 +0530 Subject: [PATCH 2/6] Fix flake8 errors --- beets/importer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index b9c00c8c7..8541a2f95 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1038,6 +1038,7 @@ class ArchiveImportTask(SentinelImportTask): self.extracted = True self.toppath = extract_to + class MergedImportTask(ImportTask): def __init__(self, toppath, paths, items): super(MergedImportTask, self).__init__(toppath, paths, items) @@ -1367,9 +1368,8 @@ def user_query(session, task): duplicate_paths = [item.path for item in duplicate_items] - merged_task = MergedImportTask(None, - task.paths + duplicate_paths, - task.items + duplicate_items) + merged_task = MergedImportTask(None, task.paths + duplicate_paths, + task.items + duplicate_items) ipl = pipeline.Pipeline([ iter([merged_task]), From 4d4fb504d538bcbfb7e4dbefd5efa42bedcdf8cd Mon Sep 17 00:00:00 2001 From: Meet Udeshi Date: Sat, 4 Nov 2017 01:33:19 +0530 Subject: [PATCH 3/6] Change to subset check for finding duplicates also implement repeating code as helper functions --- beets/importer.py | 87 +++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 8541a2f95..94594a2a2 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -633,10 +633,11 @@ class ImportTask(BaseImportTask): )) for album in lib.albums(duplicate_query): - # Check whether the album is identical in contents, in which - # case it is not a duplicate (will be replaced). + # Check whether the album paths are all present in the task + # i.e. album is being completely re-imported by the task, + # in which case it is not a duplicate (will be replaced). album_paths = set(i.path for i in album.items()) - if album_paths != task_paths: + if not (album_paths <= task_paths): duplicates.append(album) return duplicates @@ -1039,11 +1040,6 @@ class ArchiveImportTask(SentinelImportTask): self.toppath = extract_to -class MergedImportTask(ImportTask): - def __init__(self, toppath, paths, items): - super(MergedImportTask, self).__init__(toppath, paths, items) - - class ImportTaskFactory(object): """Generate album and singleton import tasks for all media files indicated by a path. @@ -1231,6 +1227,27 @@ class ImportTaskFactory(object): displayable_path(path), exc) +# Pipeline utilities + +def _freshen_items(items): + # Clear IDs from re-tagged items so they appear "fresh" when + # we add them back to the library. + for item in items: + item.id = None + item.album_id = None + + +def _extend_pipeline(tasks, *stages): + # Return pipeline extension for stages with list of tasks + if type(tasks) == list: + task_iter = iter(tasks) + else: + task_iter = tasks + + ipl = pipeline.Pipeline([task_iter] + list(stages)) + return pipeline.multiple(ipl.pull()) + + # Full-album pipeline stages. def read_tasks(session): @@ -1276,12 +1293,7 @@ def query_tasks(session): log.debug(u'yielding album {0}: {1} - {2}', album.id, album.albumartist, album.album) items = list(album.items()) - - # Clear IDs from re-tagged items so they appear "fresh" when - # we add them back to the library. - for item in items: - item.id = None - item.album_id = None + _freshen_items(items) task = ImportTask(None, [album.item_dir()], items) for task in task.handle_created(session): @@ -1341,42 +1353,29 @@ def user_query(session, task): yield new_task yield SentinelImportTask(task.toppath, task.paths) - ipl = pipeline.Pipeline([ - emitter(task), - lookup_candidates(session), - user_query(session), - ]) - return pipeline.multiple(ipl.pull()) + return _extend_pipeline(emitter(task), + lookup_candidates(session), + user_query(session)) # As albums: group items by albums and create task for each album if task.choice_flag is action.ALBUMS: - ipl = pipeline.Pipeline([ - iter([task]), - group_albums(session), - lookup_candidates(session), - user_query(session) - ]) - return pipeline.multiple(ipl.pull()) + return _extend_pipeline([task], + group_albums(session), + lookup_candidates(session), + user_query(session)) - if type(task) != MergedImportTask: - resolve_duplicates(session, task) - if task.should_merge_duplicates: - duplicate_items = task.duplicate_items(session.lib) - for item in duplicate_items: - item.id = None - item.album_id = None + resolve_duplicates(session, task) + if task.should_merge_duplicates: + duplicate_items = task.duplicate_items(session.lib) + _freshen_items(duplicate_items) + duplicate_paths = [item.path for item in duplicate_items] - duplicate_paths = [item.path for item in duplicate_items] + merged_task = ImportTask(None, task.paths + duplicate_paths, + task.items + duplicate_items) - merged_task = MergedImportTask(None, task.paths + duplicate_paths, - task.items + duplicate_items) - - ipl = pipeline.Pipeline([ - iter([merged_task]), - lookup_candidates(session), - user_query(session) - ]) - return pipeline.multiple(ipl.pull()) + return _extend_pipeline([merged_task], + lookup_candidates(session), + user_query(session)) apply_choice(session, task) return task From 1646da4d9c1d7353d3f7126efdfdbe85bc8d00ac Mon Sep 17 00:00:00 2001 From: Meet Udeshi Date: Sat, 4 Nov 2017 03:08:53 +0530 Subject: [PATCH 4/6] Add comments and explain merge feature in config docs and guide --- beets/importer.py | 5 +++++ docs/guides/tagger.rst | 16 ++++++++++++---- docs/reference/config.rst | 11 ++++++----- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 94594a2a2..4af276abe 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1365,8 +1365,13 @@ def user_query(session, task): user_query(session)) resolve_duplicates(session, task) + if task.should_merge_duplicates: + # Create a new task for tagging the current items + # and duplicates together duplicate_items = task.duplicate_items(session.lib) + + # duplicates would be reimported so make them look "fresh" _freshen_items(duplicate_items) duplicate_paths = [item.path for item in duplicate_items] diff --git a/docs/guides/tagger.rst b/docs/guides/tagger.rst index 4c9df42f8..db11dbcc6 100644 --- a/docs/guides/tagger.rst +++ b/docs/guides/tagger.rst @@ -234,17 +234,25 @@ If beets finds an album or item in your library that seems to be the same as the one you're importing, you may see a prompt like this:: This album is already in the library! - [S]kip new, Keep both, Remove old? + [S]kip new, Keep both, Remove old, Merge all? Beets wants to keep you safe from duplicates, which can be a real pain, so you -have three choices in this situation. You can skip importing the new music, +have four choices in this situation. You can skip importing the new music, choosing to keep the stuff you already have in your library; you can keep both -the old and the new music; or you can remove the existing music and choose the -new stuff. If you choose that last "trump" option, any duplicates will be +the old and the new music; you can remove the existing music and choose the +new stuff; or you can merge the newly imported album and existing duplicate +into one single album. +If you choose that "remove" option, any duplicates will be removed from your library database---and, if the corresponding files are located inside of your beets library directory, the files themselves will be deleted as well. +If you choose "merge", beets will try re-importing the existing and new tracks +as one bundle so they will get tagged together appropriately. +This is particularly helpful when you are importing extra tracks +of an album in your library with missing tracks, so beets will ask you the same +questions as it would if you were importing all tracks at once. + If you choose to keep two identically-named albums, beets can avoid storing both in the same directory. See :ref:`aunique` for details. diff --git a/docs/reference/config.rst b/docs/reference/config.rst index 9faddd2d2..ce45a94ee 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -571,11 +571,12 @@ Default: ``yes``. duplicate_action ~~~~~~~~~~~~~~~~ -Either ``skip``, ``keep``, ``remove``, or ``ask``. Controls how duplicates -are treated in import task. "skip" means that new item(album or track) will be -skipped; "keep" means keep both old and new items; "remove" means remove old -item; "ask" means the user should be prompted for the action each time. -The default is ``ask``. +Either ``skip``, ``keep``, ``remove``, ``merge`` or ``ask``. +Controls how duplicates are treated in import task. +"skip" means that new item(album or track) will be skipped; +"keep" means keep both old and new items; "remove" means remove old +item; "merge" means merge into one album; "ask" means the user +should be prompted for the action each time. The default is ``ask``. .. _bell: From 83f2e4493680b1bfe3430298d8bd6e6e441a5e14 Mon Sep 17 00:00:00 2001 From: Meet Udeshi Date: Sun, 5 Nov 2017 06:20:48 +0530 Subject: [PATCH 5/6] Mark merged items in session for future tasks --- beets/importer.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/beets/importer.py b/beets/importer.py index 4af276abe..7e5191399 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -188,6 +188,8 @@ class ImportSession(object): self.paths = paths self.query = query self._is_resuming = dict() + self._merged_items = set() + self._merged_dirs = set() # Normalize the paths. if self.paths: @@ -350,6 +352,24 @@ class ImportSession(object): self._history_dirs = history_get() return self._history_dirs + def already_merged(self, paths): + """Returns true if all the paths being imported were part of a merge + during previous tasks. + """ + for path in paths: + if path not in self._merged_items \ + and path not in self._merged_dirs: + return False + return True + + def mark_merged(self, paths): + """Mark paths and directories as merged for future reimport tasks. + """ + self._merged_items.update(paths) + dirs = set([os.path.dirname(path) if os.path.isfile(path) else path + for path in paths]) + self._merged_dirs.update(dirs) + def is_resuming(self, toppath): """Return `True` if user wants to resume import of this path. @@ -1339,6 +1359,9 @@ def user_query(session, task): if task.skip: return task + if session.already_merged(task.paths): + return pipeline.BUBBLE + # Ask the user for a choice. task.choose_match(session) plugins.send('import_task_choice', session=session, task=task) @@ -1371,10 +1394,13 @@ def user_query(session, task): # and duplicates together duplicate_items = task.duplicate_items(session.lib) - # duplicates would be reimported so make them look "fresh" + # Duplicates would be reimported so make them look "fresh" _freshen_items(duplicate_items) duplicate_paths = [item.path for item in duplicate_items] + # Record merged paths in the session so they are not reimported + session.mark_merged(duplicate_paths) + merged_task = ImportTask(None, task.paths + duplicate_paths, task.items + duplicate_items) From a6215f30a72976e8db2bd422e4ab842f85c9daaf Mon Sep 17 00:00:00 2001 From: Meet Udeshi Date: Sun, 5 Nov 2017 12:35:32 +0530 Subject: [PATCH 6/6] flake8 fixes --- beets/importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 7e5191399..e91b35656 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -358,7 +358,7 @@ class ImportSession(object): """ for path in paths: if path not in self._merged_items \ - and path not in self._merged_dirs: + and path not in self._merged_dirs: return False return True @@ -367,7 +367,7 @@ class ImportSession(object): """ self._merged_items.update(paths) dirs = set([os.path.dirname(path) if os.path.isfile(path) else path - for path in paths]) + for path in paths]) self._merged_dirs.update(dirs) def is_resuming(self, toppath):