fix #1000: provide --strict option (don't match on null attributes)

This commit is contained in:
Pedro Silva 2015-03-27 10:46:09 +01:00
parent 86559bcb1a
commit 66e06baca7
2 changed files with 30 additions and 8 deletions

View file

@ -74,30 +74,37 @@ def _checksum(item, prog, log):
return key, checksum
def _group_by(objs, keys, log):
def _group_by(objs, keys, strict, log):
"""Return a dictionary with keys arbitrary concatenations of attributes and
values lists of objects (Albums or Items) with those keys.
If strict, all attributes must be defined for a duplicate match.
"""
import collections
counts = collections.defaultdict(list)
for obj in objs:
values = [getattr(obj, k, None) for k in keys]
values = [v for v in values if v not in (None, '')]
if values:
if strict and len(values) < len(keys):
log.debug(u'{0}: some keys {1} on item {2} are null or empty: '
'skipping',
PLUGIN, keys, displayable_path(obj.path))
elif (not strict and not len(values)):
log.debug(u'{0}: all keys {1} on item {2} are null or empty: '
'skipping',
PLUGIN, keys, displayable_path(obj.path))
else:
key = '\001'.join(values)
counts[key].append(obj)
else:
log.debug(u'{0}: all keys {1} on item {2} are null: skipping',
PLUGIN, keys, displayable_path(obj.path))
return counts
def _duplicates(objs, keys, full, log):
def _duplicates(objs, keys, full, strict, log):
"""Generate triples of keys, duplicate counts, and constituent objects.
"""
offset = 0 if full else 1
for k, objs in _group_by(objs, keys, log).iteritems():
for k, objs in _group_by(objs, keys, strict, log).iteritems():
if len(objs) > 1:
yield (k, len(objs) - offset, objs[offset:])
@ -113,6 +120,7 @@ class DuplicatesPlugin(BeetsPlugin):
'count': False,
'album': False,
'full': False,
'strict': False,
'path': False,
'keys': ['mb_trackid', 'mb_albumid'],
'checksum': None,
@ -144,6 +152,11 @@ class DuplicatesPlugin(BeetsPlugin):
help='show all versions of duplicate'
' tracks or albums')
self._command.parser.add_option('-s', '--strict', dest='strict',
action='store_true',
help='report duplicates only if all'
' attributes are set')
self._command.parser.add_option('-k', '--keys', dest='keys',
action='callback', metavar='KEY1 KEY2',
callback=vararg_callback,
@ -170,6 +183,7 @@ class DuplicatesPlugin(BeetsPlugin):
fmt = self.config['format'].get()
album = self.config['album'].get(bool)
full = self.config['full'].get(bool)
strict = self.config['strict'].get(bool)
keys = self.config['keys'].get()
checksum = self.config['checksum'].get()
copy = self.config['copy'].get()
@ -206,6 +220,7 @@ class DuplicatesPlugin(BeetsPlugin):
for obj_id, obj_count, objs in _duplicates(items,
keys=keys,
full=full,
strict=strict,
log=self._log):
if obj_id: # Skip empty IDs.
for o in objs:

View file

@ -27,6 +27,7 @@ duplicates themselves via command-line switches ::
report duplicates based on arbitrary command
-d, --delete delete items from library and disk
-F, --full show all versions of duplicate tracks or albums
-s, --strict report duplicates only if all attributes are set
-k, --keys report duplicates based on keys
-m DEST, --move=DEST move items to dest
-o DEST, --copy=DEST copy items to dest
@ -64,7 +65,10 @@ file. The available options mirror the command-line options:
Default: :ref:`format_item`
- **full**: List every track or album that has duplicates, not just the
duplicates themselves.
Default: ``no``.
Default: ``no``
- **strict**: Do not report duplicate matches if some of the
attributes are not defined (ie. null or empty).
Default: ``no``
- **keys**: Define in which track or album fields duplicates are to be
searched. By default, the plugin uses the musicbrainz track and album IDs for
this purpose. Using the ``keys`` option (as a YAML list in the configuration
@ -131,5 +135,8 @@ Tag duplicate items with some flag::
beet duplicates --tag dup=1
Ignore items with undefined keys::
beet duplicates --strict
.. _spark: https://github.com/holman/spark