diff --git a/beetsplug/duplicates.py b/beetsplug/duplicates.py index 0f4688d94..ca0b506b7 100644 --- a/beetsplug/duplicates.py +++ b/beetsplug/duplicates.py @@ -23,24 +23,24 @@ PLUGIN = 'duplicates' log = logging.getLogger('beets') -def _group_by_id(objs): - """Return a dictionary whose keys are MBIDs and whose values are - lists of objects (Albums or Items) with that ID. +def _group_by(objs, keys): + """Return a dictionary whose keys are arbitrary concatenations of attributes + and whose values are lists of objects (Albums or Items) with those keys. """ import collections counts = collections.defaultdict(list) for obj in objs: - mbid = getattr(obj, 'mb_trackid', obj.mb_albumid) - counts[mbid].append(obj) + key = '\001'.join(getattr(obj, k, obj.mb_albumid) for k in keys) + counts[key].append(obj) return counts -def _duplicates(objs, full): +def _duplicates(objs, keys=['mb_trackid'], full=0): """Generate triples of MBIDs, duplicate counts, and constituent objects. """ offset = 0 if full else 1 - for mbid, objs in _group_by_id(objs).iteritems(): + for mbid, objs in _group_by(objs, keys).iteritems(): if len(objs) > 1: yield (mbid, len(objs) - offset, objs[offset:]) @@ -80,13 +80,19 @@ class DuplicatesPlugin(BeetsPlugin): help='show all versions of duplicate\ tracks or albums') + self._command.parser.add_option('-k', '--keys', dest='keys', + type=str, default='mb_trackid', + help='report duplicates based on keys') + def commands(self): def _dup(lib, opts, args): + opts.keys = opts.keys.split(',') self.config.set_args(opts) fmt = self.config['format'].get() count = self.config['count'].get() album = self.config['album'].get() full = self.config['full'].get() + keys = self.config['keys'].get() if album: items = lib.albums(decargs(args)) @@ -101,7 +107,9 @@ class DuplicatesPlugin(BeetsPlugin): fmt = '$albumartist - $album - $title' fmt += ': {0}' - for obj_id, obj_count, objs in _duplicates(items, full): + for obj_id, obj_count, objs in _duplicates(items, + keys=keys, + full=full): if obj_id: # Skip empty IDs. for o in objs: print_obj(o, lib, fmt=fmt.format(obj_count)) diff --git a/docs/changelog.rst b/docs/changelog.rst index 922d36b4d..b3679293d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -6,6 +6,9 @@ Changelog New features: +* :doc:`/plugins/duplicates`: The new ``keys`` option allows you to specify + arbitrary fields over which to consider potential duplicates. + * :doc:`/plugins/lastgenre`: The new ``multiple`` option has been replaced with the ``count`` option, which lets you limit the number of genres added to your music. (No more thousand-character genre fields!) Also, the diff --git a/docs/plugins/duplicates.rst b/docs/plugins/duplicates.rst index 68edbf325..c344d50fc 100644 --- a/docs/plugins/duplicates.rst +++ b/docs/plugins/duplicates.rst @@ -31,6 +31,9 @@ config file:: count: no album: no full: no + keys: + - mb_trackid + - album or on the command-line:: @@ -42,6 +45,8 @@ or on the command-line:: of tracks -F, --full show all versions of duplicate tracks or albums + -k KEYS, --keys=KEYS report duplicates based on keys + format ~~~~~~ @@ -72,6 +77,17 @@ full The ``full`` option (default: false) lists every track or album that has duplicates, not just the duplicates themselves. +keys +~~~~ + +The ``keys`` option (default: ``mb_trackid``) defines in which track +or album fields duplicates are to be searched. By default, the plugin +only uses the musicbrainz track or album ID for this purpose. Using the +``keys`` option (as a YAML list in the configuration file, or a +comma-delimited string in the command-line), you can extend this behavior +to consider other attributes. + + Examples -------- @@ -97,9 +113,15 @@ The same as the above but include the original album, and show the path:: beet duplicates -acf '$path' +Get rid of false positives arising from the same track existing in different albums:: + + beet duplicates -k mb_trackid,album + TODO ---- - Allow deleting duplicates. +- Provide option to invert key selection +- Provide additional strategies for duplicate finding (fingerprint, hash, etc.) .. _spark: https://github.com/holman/spark