mirror of
https://github.com/beetbox/beets.git
synced 2026-01-30 20:13:37 +01:00
Add new option 'checksum' for specifying arbitrary checksumming commands
This commit is contained in:
parent
587de12ecc
commit
3379c31f4f
3 changed files with 73 additions and 6 deletions
|
|
@ -14,15 +14,43 @@
|
|||
|
||||
"""List duplicate tracks or albums.
|
||||
"""
|
||||
import shlex
|
||||
import logging
|
||||
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets.ui import decargs, print_obj, vararg_callback, Subcommand
|
||||
from beets.util import command_output, displayable_path
|
||||
|
||||
PLUGIN = 'duplicates'
|
||||
log = logging.getLogger('beets')
|
||||
|
||||
|
||||
def _checksum(item, prog):
|
||||
"""Run external `prog` on file path associated with `item`, cache
|
||||
output as flexattr on a key that is the name of the program, and
|
||||
return the key, checksum tuple.
|
||||
"""
|
||||
args = shlex.split(prog.format(file=item.path))
|
||||
key = args[0]
|
||||
checksum = getattr(item, key, False)
|
||||
if not checksum:
|
||||
log.debug('%s: key %s on item %s not cached: computing checksum',
|
||||
PLUGIN, key, displayable_path(item.path))
|
||||
try:
|
||||
checksum = command_output(args)
|
||||
setattr(item, key, checksum)
|
||||
item.store()
|
||||
log.info('%s: computed checksum for %s using %s',
|
||||
PLUGIN, item.title, key)
|
||||
except Exception as e:
|
||||
log.debug('%s: failed to checksum %s: %s',
|
||||
PLUGIN, displayable_path(item.path), e)
|
||||
else:
|
||||
log.debug('%s: key %s on item %s cached: not computing checksum',
|
||||
PLUGIN, key, displayable_path(item.path))
|
||||
return key, checksum
|
||||
|
||||
|
||||
def _group_by(objs, keys):
|
||||
"""Return a dictionary whose keys are arbitrary concatenations of attributes
|
||||
and whose values are lists of objects (Albums or Items) with those keys.
|
||||
|
|
@ -30,7 +58,7 @@ def _group_by(objs, keys):
|
|||
import collections
|
||||
counts = collections.defaultdict(list)
|
||||
for obj in objs:
|
||||
key = '\001'.join(getattr(obj, k, obj.mb_albumid) for k in keys)
|
||||
key = '\001'.join(getattr(obj, k, '') for k in keys)
|
||||
counts[key].append(obj)
|
||||
return counts
|
||||
|
||||
|
|
@ -55,7 +83,9 @@ class DuplicatesPlugin(BeetsPlugin):
|
|||
self.config.add({'count': False})
|
||||
self.config.add({'album': False})
|
||||
self.config.add({'full': False})
|
||||
self.config.add({'path': False})
|
||||
self.config.add({'keys': ['mb_trackid', 'mb_albumid']})
|
||||
self.config.add({'checksum': 'ffmpeg -i {file} -f crc -'})
|
||||
|
||||
self._command = Subcommand('duplicates',
|
||||
help=__doc__,
|
||||
|
|
@ -91,6 +121,11 @@ class DuplicatesPlugin(BeetsPlugin):
|
|||
callback=vararg_callback,
|
||||
help='report duplicates based on keys')
|
||||
|
||||
self._command.parser.add_option('-C', '--checksum', dest='checksum',
|
||||
action='store',
|
||||
help='report duplicates based on\
|
||||
arbitrary command')
|
||||
|
||||
def commands(self):
|
||||
def _dup(lib, opts, args):
|
||||
self.config.set_args(opts)
|
||||
|
|
@ -99,8 +134,10 @@ class DuplicatesPlugin(BeetsPlugin):
|
|||
album = self.config['album'].get()
|
||||
full = self.config['full'].get()
|
||||
keys = self.config['keys'].get()
|
||||
checksum = self.config['checksum'].get()
|
||||
|
||||
if album:
|
||||
keys = ['mb_albumid']
|
||||
items = lib.albums(decargs(args))
|
||||
else:
|
||||
items = lib.items(decargs(args))
|
||||
|
|
@ -116,6 +153,11 @@ class DuplicatesPlugin(BeetsPlugin):
|
|||
fmt = '$albumartist - $album - $title'
|
||||
fmt += ': {0}'
|
||||
|
||||
if checksum:
|
||||
for i in items:
|
||||
k, _ = _checksum(i, checksum)
|
||||
keys = ['k']
|
||||
|
||||
for obj_id, obj_count, objs in _duplicates(items,
|
||||
keys=keys,
|
||||
full=full):
|
||||
|
|
|
|||
|
|
@ -11,7 +11,9 @@ New features:
|
|||
``callback=beets.ui.varargs_callback`` and a variable number of arguments.
|
||||
|
||||
* :doc:`/plugins/duplicates`: The new ``keys`` option allows you to specify
|
||||
arbitrary fields over which to consider potential duplicates.
|
||||
arbitrary fields over which to consider potential duplicates. The new
|
||||
``checksum`` option allows the use of any arbitrary program to checksum
|
||||
items as an alternative duplicate identification strategy.
|
||||
|
||||
* :doc:`/plugins/lastgenre`: The new ``multiple`` option has been replaced
|
||||
with the ``count`` option, which lets you limit the number of genres added
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ config file::
|
|||
keys:
|
||||
- mb_trackid
|
||||
- album
|
||||
checksum: 'ffmpeg -i {file} -f crc -'
|
||||
|
||||
or on the command-line::
|
||||
|
||||
|
|
@ -45,7 +46,12 @@ or on the command-line::
|
|||
of tracks
|
||||
-F, --full show all versions of duplicate
|
||||
tracks or albums
|
||||
-k KEYS, --keys=KEYS report duplicates based on keys
|
||||
-p, --path print paths for matched items
|
||||
or albums
|
||||
-k, --keys report duplicates based on keys
|
||||
-C CHECKSUM, --checksum=CHECKSUM
|
||||
report duplicates based on
|
||||
arbitrary command
|
||||
|
||||
|
||||
format
|
||||
|
|
@ -57,6 +63,11 @@ album. This uses the same template syntax as beets’ :doc:`path formats
|
|||
</reference/pathformat>`. The usage is inspired by, and therefore
|
||||
similar to, the :ref:`list <list-cmd>` command.
|
||||
|
||||
path
|
||||
~~~~
|
||||
|
||||
Convenience wrapper for ``-f \$path``.
|
||||
|
||||
count
|
||||
~~~~~
|
||||
|
||||
|
|
@ -80,13 +91,21 @@ has duplicates, not just the duplicates themselves.
|
|||
keys
|
||||
~~~~
|
||||
|
||||
The ``keys`` option (default: ``mb_trackid``) defines in which track
|
||||
The ``keys`` option (default: ``[mb_trackid, mb_albumid]``) defines in which track
|
||||
or album fields duplicates are to be searched. By default, the plugin
|
||||
uses the musicbrainz track and album IDs for this purpose. Using the
|
||||
``keys`` option (as a YAML list in the configuration file, or as
|
||||
space-delimited strings in the command-line), you can extend this behavior
|
||||
to consider other attributes.
|
||||
|
||||
checksum
|
||||
~~~~~~~~
|
||||
|
||||
The ``checksum`` option (default: ``ffmpeg -i {file} -f crc -``) enables the use of
|
||||
any arbitrary command to compute a checksum of items. It overrides the ``keys``
|
||||
option the first time it is run; however, because it caches the resulting checksums
|
||||
as ``flexattrs`` in the database, you can use
|
||||
``--keys=name_of_the_checksumming_program any_other_keys`` the second time around.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
|
@ -112,16 +131,20 @@ The same as the above but include the original album, and show the path::
|
|||
|
||||
beet duplicates -acf '$path'
|
||||
|
||||
|
||||
Get tracks with the same title, artist, and album::
|
||||
|
||||
beet duplicates -k title albumartist album
|
||||
|
||||
Compute Adler CRC32 or MD5 checksums, storing them as flexattrs, and report back
|
||||
duplicates based on those values::
|
||||
|
||||
beet dup -C 'ffmpeg -i {file} -f crc -'
|
||||
beet dup -C 'md5sum {file}'
|
||||
|
||||
TODO
|
||||
----
|
||||
|
||||
- Allow deleting duplicates.
|
||||
- Provide option to invert key selection
|
||||
- Provide additional strategies for duplicate finding (fingerprint, hash, etc.)
|
||||
|
||||
.. _spark: https://github.com/holman/spark
|
||||
|
|
|
|||
Loading…
Reference in a new issue