mirror of
https://github.com/beetbox/beets.git
synced 2026-01-03 22:42:44 +01:00
add 'keys' option to allow duplicate matching on arbitrary attributes
- towards addressing #427 - TODO: invert key list - TODO: implement alternative strategies (fp, md5, etc)
This commit is contained in:
parent
2bb2827a16
commit
6ae10ed765
3 changed files with 41 additions and 8 deletions
|
|
@ -23,24 +23,24 @@ PLUGIN = 'duplicates'
|
|||
log = logging.getLogger('beets')
|
||||
|
||||
|
||||
def _group_by_id(objs):
|
||||
"""Return a dictionary whose keys are MBIDs and whose values are
|
||||
lists of objects (Albums or Items) with that ID.
|
||||
def _group_by(objs, keys):
|
||||
"""Return a dictionary whose keys are arbitrary concatenations of attributes
|
||||
and whose values are lists of objects (Albums or Items) with those keys.
|
||||
"""
|
||||
import collections
|
||||
counts = collections.defaultdict(list)
|
||||
for obj in objs:
|
||||
mbid = getattr(obj, 'mb_trackid', obj.mb_albumid)
|
||||
counts[mbid].append(obj)
|
||||
key = '\001'.join(getattr(obj, k, obj.mb_albumid) for k in keys)
|
||||
counts[key].append(obj)
|
||||
return counts
|
||||
|
||||
|
||||
def _duplicates(objs, full):
|
||||
def _duplicates(objs, keys=['mb_trackid'], full=0):
|
||||
"""Generate triples of MBIDs, duplicate counts, and constituent
|
||||
objects.
|
||||
"""
|
||||
offset = 0 if full else 1
|
||||
for mbid, objs in _group_by_id(objs).iteritems():
|
||||
for mbid, objs in _group_by(objs, keys).iteritems():
|
||||
if len(objs) > 1:
|
||||
yield (mbid, len(objs) - offset, objs[offset:])
|
||||
|
||||
|
|
@ -80,13 +80,19 @@ class DuplicatesPlugin(BeetsPlugin):
|
|||
help='show all versions of duplicate\
|
||||
tracks or albums')
|
||||
|
||||
self._command.parser.add_option('-k', '--keys', dest='keys',
|
||||
type=str, default='mb_trackid',
|
||||
help='report duplicates based on keys')
|
||||
|
||||
def commands(self):
|
||||
def _dup(lib, opts, args):
|
||||
opts.keys = opts.keys.split(',')
|
||||
self.config.set_args(opts)
|
||||
fmt = self.config['format'].get()
|
||||
count = self.config['count'].get()
|
||||
album = self.config['album'].get()
|
||||
full = self.config['full'].get()
|
||||
keys = self.config['keys'].get()
|
||||
|
||||
if album:
|
||||
items = lib.albums(decargs(args))
|
||||
|
|
@ -101,7 +107,9 @@ class DuplicatesPlugin(BeetsPlugin):
|
|||
fmt = '$albumartist - $album - $title'
|
||||
fmt += ': {0}'
|
||||
|
||||
for obj_id, obj_count, objs in _duplicates(items, full):
|
||||
for obj_id, obj_count, objs in _duplicates(items,
|
||||
keys=keys,
|
||||
full=full):
|
||||
if obj_id: # Skip empty IDs.
|
||||
for o in objs:
|
||||
print_obj(o, lib, fmt=fmt.format(obj_count))
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@ Changelog
|
|||
|
||||
New features:
|
||||
|
||||
* :doc:`/plugins/duplicates`: The new ``keys`` option allows you to specify
|
||||
arbitrary fields over which to consider potential duplicates.
|
||||
|
||||
* :doc:`/plugins/lastgenre`: The new ``multiple`` option has been replaced
|
||||
with the ``count`` option, which lets you limit the number of genres added
|
||||
to your music. (No more thousand-character genre fields!) Also, the
|
||||
|
|
|
|||
|
|
@ -31,6 +31,9 @@ config file::
|
|||
count: no
|
||||
album: no
|
||||
full: no
|
||||
keys:
|
||||
- mb_trackid
|
||||
- album
|
||||
|
||||
or on the command-line::
|
||||
|
||||
|
|
@ -42,6 +45,8 @@ or on the command-line::
|
|||
of tracks
|
||||
-F, --full show all versions of duplicate
|
||||
tracks or albums
|
||||
-k KEYS, --keys=KEYS report duplicates based on keys
|
||||
|
||||
|
||||
format
|
||||
~~~~~~
|
||||
|
|
@ -72,6 +77,17 @@ full
|
|||
The ``full`` option (default: false) lists every track or album that
|
||||
has duplicates, not just the duplicates themselves.
|
||||
|
||||
keys
|
||||
~~~~
|
||||
|
||||
The ``keys`` option (default: ``mb_trackid``) defines in which track
|
||||
or album fields duplicates are to be searched. By default, the plugin
|
||||
only uses the musicbrainz track or album ID for this purpose. Using the
|
||||
``keys`` option (as a YAML list in the configuration file, or a
|
||||
comma-delimited string in the command-line), you can extend this behavior
|
||||
to consider other attributes.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
|
|
@ -97,9 +113,15 @@ The same as the above but include the original album, and show the path::
|
|||
beet duplicates -acf '$path'
|
||||
|
||||
|
||||
Get rid of false positives arising from the same track existing in different albums::
|
||||
|
||||
beet duplicates -k mb_trackid,album
|
||||
|
||||
TODO
|
||||
----
|
||||
|
||||
- Allow deleting duplicates.
|
||||
- Provide option to invert key selection
|
||||
- Provide additional strategies for duplicate finding (fingerprint, hash, etc.)
|
||||
|
||||
.. _spark: https://github.com/holman/spark
|
||||
|
|
|
|||
Loading…
Reference in a new issue