mirror of
https://github.com/beetbox/beets.git
synced 2025-12-28 19:42:42 +01:00
duplicate trumping: remove items & delete files
Based on the "remove_duplicates" flag on ImportTask, the apply_choices coroutine now looks for duplicates (using an extended version of the _duplicate_check functions) and removes items from the library. It also *deletes* files associated with those items when they are located inside the beets library directory. Files outside of the directory are left on disk (but their DB entry is still removed). This should "do the right thing" in most cases -- again, this is something we can add a config option for if it comes up.
This commit is contained in:
parent
34ec25e642
commit
24cdf2a72e
2 changed files with 80 additions and 72 deletions
|
|
@ -97,32 +97,18 @@ def _reopen_lib(lib):
|
|||
else:
|
||||
return lib
|
||||
|
||||
def _duplicate_check(lib, task, recent=None):
|
||||
"""Check whether an album already exists in the library. `recent`
|
||||
should be a set of (artist, album) pairs that will be built up
|
||||
with every call to this function and checked along with the
|
||||
library.
|
||||
def _duplicate_check(lib, task):
|
||||
"""Check whether an album already exists in the library. Returns a
|
||||
list of Album objects (empty if no duplicates are found).
|
||||
"""
|
||||
if task.choice_flag is action.ASIS:
|
||||
artist = task.cur_artist
|
||||
album = task.cur_album
|
||||
elif task.choice_flag is action.APPLY:
|
||||
artist = task.info.artist
|
||||
album = task.info.album
|
||||
else:
|
||||
return False
|
||||
assert task.choice_flag in (action.ASIS, action.APPLY)
|
||||
artist, album = task.chosen_ident()
|
||||
|
||||
if artist is None:
|
||||
# As-is import with no artist. Skip check.
|
||||
return False
|
||||
return []
|
||||
|
||||
# Try the recent albums.
|
||||
if recent is not None:
|
||||
if (artist, album) in recent:
|
||||
return True
|
||||
recent.add((artist, album))
|
||||
|
||||
# Look in the library.
|
||||
found_albums = []
|
||||
cur_paths = set(i.path for i in task.items if i)
|
||||
for album_cand in lib.albums(artist=artist):
|
||||
if album_cand.album == album:
|
||||
|
|
@ -131,34 +117,23 @@ def _duplicate_check(lib, task, recent=None):
|
|||
other_paths = set(i.path for i in album_cand.items())
|
||||
if other_paths == cur_paths:
|
||||
continue
|
||||
return True
|
||||
found_albums.append(album_cand)
|
||||
return found_albums
|
||||
|
||||
return False
|
||||
def _item_duplicate_check(lib, task):
|
||||
"""Check whether an item already exists in the library. Returns a
|
||||
list of Item objects.
|
||||
"""
|
||||
assert task.choice_flag in (action.ASIS, action.APPLY)
|
||||
artist, title = task.chosen_ident()
|
||||
|
||||
def _item_duplicate_check(lib, task, recent=None):
|
||||
"""Check whether an item already exists in the library."""
|
||||
if task.choice_flag is action.ASIS:
|
||||
artist = task.item.artist
|
||||
title = task.item.title
|
||||
elif task.choice_flag is action.APPLY:
|
||||
artist = task.info.artist
|
||||
title = task.info.title
|
||||
else:
|
||||
return False
|
||||
|
||||
# Try recent items.
|
||||
if recent is not None:
|
||||
if (artist, title) in recent:
|
||||
return True
|
||||
recent.add((artist, title))
|
||||
|
||||
# Check the library.
|
||||
found_items = []
|
||||
for other_item in lib.items(artist=artist, title=title):
|
||||
# Existing items not considered duplicates.
|
||||
if other_item.path == task.item.path:
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
found_items.append(other_item)
|
||||
return found_items
|
||||
|
||||
def _infer_album_fields(task):
|
||||
"""Given an album and an associated import task, massage the
|
||||
|
|
@ -436,6 +411,26 @@ class ImportTask(object):
|
|||
"""
|
||||
return self.sentinel or self.choice_flag == action.SKIP
|
||||
|
||||
# Useful data.
|
||||
def chosen_ident(self):
|
||||
"""Returns identifying metadata about the current choice. For
|
||||
albums, this is an (artist, album) pair. For items, this is
|
||||
(artist, title). May only be called when the choice flag is ASIS
|
||||
(in which case the data comes from the files' current metadata)
|
||||
or APPLY (data comes from the choice).
|
||||
"""
|
||||
assert self.choice_flag in (action.ASIS, action.APPLY)
|
||||
if self.is_album:
|
||||
if self.choice_flag is action.ASIS:
|
||||
return (self.cur_artist, self.cur_album)
|
||||
elif self.choice_flag is action.APPLY:
|
||||
return (self.info.artist, self.info.album)
|
||||
else:
|
||||
if self.choice_flag is action.ASIS:
|
||||
return (self.item.artist, self.item.title)
|
||||
elif self.choice_flag is action.APPLY:
|
||||
return (self.info.artist, self.info.title)
|
||||
|
||||
|
||||
# Full-album pipeline stages.
|
||||
|
||||
|
|
@ -589,9 +584,15 @@ def user_query(config):
|
|||
continue
|
||||
|
||||
# Check for duplicates if we have a match (or ASIS).
|
||||
if _duplicate_check(lib, task, recent):
|
||||
config.resolve_duplicate_func(task, config)
|
||||
log_choice(config, task, True)
|
||||
if task.choice_flag in (action.ASIS, action.APPLY):
|
||||
ident = task.chosen_ident()
|
||||
# The "recent" set keeps track of identifiers for recently
|
||||
# imported albums -- those that haven't reached the database
|
||||
# yet.
|
||||
if ident in recent or _duplicate_check(lib, task):
|
||||
config.resolve_duplicate_func(task, config)
|
||||
log_choice(config, task, True)
|
||||
recent.add(ident)
|
||||
|
||||
def show_progress(config):
|
||||
"""This stage replaces the initial_lookup and user_query stages
|
||||
|
|
@ -638,9 +639,9 @@ def apply_choices(config):
|
|||
if task.is_album:
|
||||
_infer_album_fields(task)
|
||||
|
||||
# Find existing item entries that these are replacing. Old
|
||||
# album structures are automatically cleaned up when the
|
||||
# last item is removed.
|
||||
# Find existing item entries that these are replacing (for
|
||||
# re-imports). Old album structures are automatically cleaned up
|
||||
# when the last item is removed.
|
||||
replaced_items = defaultdict(list)
|
||||
for item in items:
|
||||
dup_items = lib.items(library.MatchQuery('path', item.path))
|
||||
|
|
@ -651,6 +652,28 @@ def apply_choices(config):
|
|||
log.debug('%i of %i items replaced' % (len(replaced_items),
|
||||
len(items)))
|
||||
|
||||
# Find old items that should be replaced as part of a duplicate
|
||||
# resolution.
|
||||
duplicate_items = []
|
||||
if task.remove_duplicates:
|
||||
if task.is_album:
|
||||
for album in _duplicate_check(lib, task):
|
||||
duplicate_items += album.items()
|
||||
else:
|
||||
duplicate_items = _item_duplicate_check(lib, task)
|
||||
log.debug('removing %i old duplicated items' %
|
||||
len(duplicate_items))
|
||||
|
||||
# Delete duplicate files that are located inside the library
|
||||
# directory.
|
||||
for duplicate_path in [i.path for i in duplicate_items]:
|
||||
if lib.directory in util.ancestry(duplicate_path):
|
||||
log.debug(u'deleting replaced duplicate %s' %
|
||||
util.displayable_path(duplicate_path))
|
||||
util.soft_remove(duplicate_path)
|
||||
util.prune_dirs(os.path.dirname(duplicate_path),
|
||||
lib.directory)
|
||||
|
||||
# Move/copy files.
|
||||
task.old_paths = [item.path for item in items]
|
||||
for item in items:
|
||||
|
|
@ -674,6 +697,8 @@ def apply_choices(config):
|
|||
for replaced in replaced_items.itervalues():
|
||||
for item in replaced:
|
||||
lib.remove(item)
|
||||
for item in duplicate_items:
|
||||
lib.remove(item)
|
||||
|
||||
# Add new ones.
|
||||
if task.is_album:
|
||||
|
|
@ -788,9 +813,12 @@ def item_query(config):
|
|||
log_choice(config, task)
|
||||
|
||||
# Duplicate check.
|
||||
if _item_duplicate_check(lib, task, recent):
|
||||
config.resolve_duplicate_func(task, config)
|
||||
log_choice(config, task, True)
|
||||
if task.choice_flag in (action.ASIS, action.APPLY):
|
||||
ident = task.chosen_ident()
|
||||
if ident in recent or _item_duplicate_check(lib, task):
|
||||
config.resolve_duplicate_func(task, config)
|
||||
log_choice(config, task, True)
|
||||
recent.add(ident)
|
||||
|
||||
def item_progress(config):
|
||||
"""Skips the lookup and query stages in a non-autotagged singleton
|
||||
|
|
|
|||
|
|
@ -678,26 +678,6 @@ class DuplicateCheckTest(unittest.TestCase):
|
|||
self._item_task(True, 'xxx', 'yyy'))
|
||||
self.assertFalse(res)
|
||||
|
||||
def test_recent_item(self):
|
||||
recent = set()
|
||||
importer._item_duplicate_check(self.lib,
|
||||
self._item_task(False, 'xxx', 'yyy'),
|
||||
recent)
|
||||
res = importer._item_duplicate_check(self.lib,
|
||||
self._item_task(False, 'xxx', 'yyy'),
|
||||
recent)
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_recent_album(self):
|
||||
recent = set()
|
||||
importer._duplicate_check(self.lib,
|
||||
self._album_task(False, 'xxx', 'yyy'),
|
||||
recent)
|
||||
res = importer._duplicate_check(self.lib,
|
||||
self._album_task(False, 'xxx', 'yyy'),
|
||||
recent)
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_duplicate_album_existing(self):
|
||||
res = importer._duplicate_check(self.lib,
|
||||
self._album_task(False, existing=True))
|
||||
|
|
|
|||
Loading…
Reference in a new issue