diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 428de312a..f50ce109e 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -633,13 +633,14 @@ def command_output(cmd, shell=False): Python 2.6 and which can have problems if lots of output is sent to stderr. """ - with open(os.devnull, 'wb') as devnull: - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=devnull, - close_fds=platform.system() != 'Windows', - shell=shell) - stdout, _ = proc.communicate() + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=platform.system() != 'Windows', + shell=shell) + stdout, stderr = proc.communicate() + if proc.returncode: - raise subprocess.CalledProcessError(proc.returncode, cmd) + raise subprocess.CalledProcessError(proc.returncode, cmd, stderr) return stdout diff --git a/beets/util/artresizer.py b/beets/util/artresizer.py index 21a2135b4..566d63e77 100644 --- a/beets/util/artresizer.py +++ b/beets/util/artresizer.py @@ -18,6 +18,7 @@ public resizing proxy if neither is available. import urllib import subprocess import os +import re from tempfile import NamedTemporaryFile import logging from beets import util @@ -76,7 +77,7 @@ def pil_resize(maxwidth, path_in, path_out=None): def im_resize(maxwidth, path_in, path_out=None): """Resize using ImageMagick's ``convert`` tool. - tool. Return the output path of resized image. + Return the output path of resized image. """ path_out = path_out or temp_file_for(path_in) log.debug(u'artresizer: ImageMagick resizing {0} to {1}'.format( @@ -132,8 +133,9 @@ class ArtResizer(object): """Create a resizer object for the given method or, if none is specified, with an inferred method. """ - self.method = method or self._guess_method() + self.method = self._check_method(method) log.debug(u"artresizer: method is {0}".format(self.method)) + self.can_compare = self._can_compare() def resize(self, maxwidth, path_in, path_out=None): """Manipulate an image file according to the method, returning a @@ -163,26 +165,44 @@ class ArtResizer(object): """ return self.method in BACKEND_FUNCS + def _can_compare(self): + """A boolean indicating whether image comparison is available""" + return self.method[0] == IMAGEMAGICK and self.method[1] > (6, 8, 7) + @staticmethod - def _guess_method(): - """Determine which resizing method to use. Returns PIL, - IMAGEMAGICK, or WEBPROXY depending on available dependencies. + def _check_method(method=None): + """A tuple indicating whether current method is available and its version. + If no method is given, it returns a supported one. """ - # Try importing PIL. - try: - __import__('PIL', fromlist=['Image']) - return PIL - except ImportError: - pass + # Guess available method + if not method: + for m in [IMAGEMAGICK, PIL]: + _, version = ArtResizer._check_method(m) + if version: + return (m, version) + return (WEBPROXY, (0)) - # Try invoking ImageMagick's "convert". - try: - out = util.command_output(['convert', '--version']) - if 'imagemagick' in out.lower(): - # system32/convert.exe may be interfering - return IMAGEMAGICK - except (subprocess.CalledProcessError, OSError): - pass + if method == IMAGEMAGICK: + # Try invoking ImageMagick's "convert". + try: + out = util.command_output(['convert', '--version']) + if 'imagemagick' in out.lower(): + pattern = r".+ (\d+)\.(\d+)\.(\d+).*" + match = re.search(pattern, out) + if match: + return (IMAGEMAGICK, + (int(match.group(1)), + int(match.group(2)), + int(match.group(3)))) + return (IMAGEMAGICK, (0)) - # Fall back to Web proxy method. - return WEBPROXY + except (subprocess.CalledProcessError, OSError): + return (IMAGEMAGICK, None) + + if method == PIL: + # Try importing PIL. + try: + __import__('PIL', fromlist=['Image']) + return (PIL, (0)) + except ImportError: + return (PIL, None) diff --git a/beetsplug/embedart.py b/beetsplug/embedart.py index c712081ee..0289f7362 100644 --- a/beetsplug/embedart.py +++ b/beetsplug/embedart.py @@ -16,6 +16,8 @@ import os.path import logging import imghdr +import subprocess +from tempfile import NamedTemporaryFile from beets.plugins import BeetsPlugin from beets import mediafile @@ -23,7 +25,8 @@ from beets import ui from beets.ui import decargs from beets.util import syspath, normpath, displayable_path from beets.util.artresizer import ArtResizer -from beets import config +from beets import config, util + log = logging.getLogger('beets') @@ -36,12 +39,18 @@ class EmbedCoverArtPlugin(BeetsPlugin): self.config.add({ 'maxwidth': 0, 'auto': True, + 'compare_threshold': 0 }) - if self.config['maxwidth'].get(int) and \ - not ArtResizer.shared.local: + + if self.config['maxwidth'].get(int) and not ArtResizer.shared.local: self.config['maxwidth'] = 0 log.warn(u"embedart: ImageMagick or PIL not found; " u"'maxwidth' option ignored") + if self.config['compare_threshold'].get(int) and not \ + ArtResizer.shared.can_compare: + self.config['compare_threshold'] = 0 + log.warn(u"embedart: ImageMagick 6.8.7 or higher not installed; " + u"'compare_threshold' option ignored") def commands(self): # Embed command. @@ -52,12 +61,14 @@ class EmbedCoverArtPlugin(BeetsPlugin): '-f', '--file', metavar='PATH', help='the image file to embed' ) maxwidth = config['embedart']['maxwidth'].get(int) + compare_threshold = config['embedart']['compare_threshold'].get(int) def embed_func(lib, opts, args): if opts.file: imagepath = normpath(opts.file) for item in lib.items(decargs(args)): - embed_item(item, imagepath, maxwidth) + embed_item(item, imagepath, maxwidth, None, + compare_threshold) else: for album in lib.albums(decargs(args)): embed_album(album, maxwidth) @@ -72,7 +83,8 @@ class EmbedCoverArtPlugin(BeetsPlugin): def extract_func(lib, opts, args): outpath = normpath(opts.outpath or 'cover') - extract(lib, outpath, decargs(args)) + item = lib.items(decargs(args)).get() + extract(outpath, item) extract_cmd.func = extract_func # Clear command. @@ -94,10 +106,16 @@ def album_imported(lib, album): embed_album(album, config['embedart']['maxwidth'].get(int)) -def embed_item(item, imagepath, maxwidth=None, itempath=None): +def embed_item(item, imagepath, maxwidth=None, itempath=None, + compare_threshold=0): """Embed an image into the item's media file. """ + if compare_threshold: + if not check_art_similarity(item, imagepath, compare_threshold): + log.warn('Image not similar, skipping it.') + return try: + log.info(u'embedart: writing %s', displayable_path(imagepath)) item['images'] = [_mediafile_image(imagepath, maxwidth)] except IOError as exc: log.error(u'embedart: could not read image file: {0}'.format(exc)) @@ -124,7 +142,38 @@ def embed_album(album, maxwidth=None): .format(album)) for item in album.items(): - embed_item(item, imagepath, maxwidth) + embed_item(item, imagepath, maxwidth, None, + config['embedart']['compare_threshold'].get(int)) + + +def check_art_similarity(item, imagepath, compare_threshold): + """A boolean indicating if an image is similar to embedded item art. + """ + with NamedTemporaryFile(delete=True) as f: + art = extract(f.name, item) + + if art: + # Converting images to grayscale tends to minimize the weight + # of colors in the diff score + cmd = 'convert {0} {1} -colorspace gray MIFF:- | ' \ + 'compare -metric PHASH - null:'.format(syspath(imagepath), + syspath(art)) + + try: + phashDiff = util.command_output(cmd, shell=True) + except subprocess.CalledProcessError, e: + if e.returncode != 1: + log.warn(u'embedart: IM phashes compare failed for {0}, \ + {1}'.format(displayable_path(imagepath), + displayable_path(art))) + return + phashDiff = float(e.output) + + log.info(u'embedart: compare PHASH score is {0}'.format(phashDiff)) + if phashDiff > compare_threshold: + return False + + return True def _mediafile_image(image_path, maxwidth=None): @@ -142,8 +191,7 @@ def _mediafile_image(image_path, maxwidth=None): # 'extractart' command. -def extract(lib, outpath, query): - item = lib.items(query).get() +def extract(outpath, item): if not item: log.error(u'No item matches query.') return @@ -170,14 +218,15 @@ def extract(lib, outpath, query): return outpath += '.' + ext - log.info(u'Extracting album art from: {0.artist} - {0.title}\n' - u'To: {1}'.format(item, displayable_path(outpath))) + log.info(u'Extracting album art from: {0.artist} - {0.title} ' + u'to: {1}'.format(item, displayable_path(outpath))) with open(syspath(outpath), 'wb') as f: f.write(art) - + return outpath # 'clearart' command. + def clear(lib, query): log.info(u'Clearing album art from items:') for item in lib.items(query): diff --git a/docs/plugins/embedart.rst b/docs/plugins/embedart.rst index 1200071cf..c0451b720 100644 --- a/docs/plugins/embedart.rst +++ b/docs/plugins/embedart.rst @@ -19,6 +19,28 @@ embedded after each album is added to the library. This behavior can be disabled with the ``auto`` config option (see below). +.. _image-similarity-check: + +Checking image similarity before embedding +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + +When importing a lot of files with the ``auto`` option, one may be reluctant to +overwrite existing embedded art for all of them. + +It's possible to tell beets to embed fetched art only if it corresponds to a +similar image than already embedded art. This works by computing the perceptual +hashes (`PHASH`_) of the two images and checking that the difference between +the two does not exceed a given threshold. +The threshold used is given by the ``compare_threshold`` option: + +* use '0' to always embed image (disable similarity check) + +* use any positive integer to define a similarity threshold. The smaller the + value, the more similar the images must be. A value in the range [10,100] is + recommended. + +Requires `ImageMagick`_ + Manually Embedding and Extracting Art ------------------------------------- @@ -51,9 +73,17 @@ To do so, add this to your ``config.yaml``:: A maximum image width can be configured as ``maxwidth`` to downscale images before embedding them (the original image file is not altered). The resize operation reduces image width to ``maxwidth`` pixels. The height is recomputed -so that the aspect ratio is preserved. `PIL`_ or `ImageMagick`_ is required to -use the ``maxwidth`` config option. See also :ref:`image-resizing` for further +so that the aspect ratio is preserved. +Requires `ImageMagick`_ or `PIL`_, see :ref:`image-resizing` for further caveats about image resizing. +The ``compare_threshold`` option defines how similar must candidate art be +regarding to embedded art to be written to the file, see +:ref:`image-similarity-check` for more infos. +By default the option is set to '0' (candidate art is always written to file). +Requires `ImageMagick`_ + + .. _PIL: http://www.pythonware.com/products/pil/ .. _ImageMagick: http://www.imagemagick.org/ +.. _PHASH: http://www.fmwconcepts.com/misc_tests/perceptual_hash_test_results_510/ diff --git a/test/rsrc/abbey-different.jpg b/test/rsrc/abbey-different.jpg new file mode 100644 index 000000000..138c0e599 Binary files /dev/null and b/test/rsrc/abbey-different.jpg differ diff --git a/test/rsrc/abbey-similar.jpg b/test/rsrc/abbey-similar.jpg new file mode 100644 index 000000000..667cd4f0e Binary files /dev/null and b/test/rsrc/abbey-similar.jpg differ diff --git a/test/rsrc/abbey.jpg b/test/rsrc/abbey.jpg new file mode 100644 index 000000000..5bb14c47d Binary files /dev/null and b/test/rsrc/abbey.jpg differ diff --git a/test/test_embedart.py b/test/test_embedart.py index 494a81523..ee109a6f9 100644 --- a/test/test_embedart.py +++ b/test/test_embedart.py @@ -16,18 +16,41 @@ import os.path import _common from _common import unittest from helper import TestHelper, capture_log +from nose.plugins.skip import SkipTest from beets.mediafile import MediaFile +from beets import config +from beets.util import syspath +from beets.util.artresizer import ArtResizer + + +def require_artresizer_compare(test): + + def wrapper(*args, **kwargs): + if not ArtResizer.shared.can_compare: + raise SkipTest() + else: + return test(*args, **kwargs) + + wrapper.__name__ = test.__name__ + return wrapper class EmbedartCliTest(unittest.TestCase, TestHelper): - artpath = os.path.join(_common.RSRC, 'image-2x3.jpg') + small_artpath = os.path.join(_common.RSRC, 'image-2x3.jpg') + abbey_artpath = os.path.join(_common.RSRC, 'abbey.jpg') + abbey_similarpath = os.path.join(_common.RSRC, 'abbey-similar.jpg') + abbey_differentpath = os.path.join(_common.RSRC, 'abbey-different.jpg') def setUp(self): self.setup_beets() # Converter is threaded self.load_plugins('embedart') - with open(self.artpath) as f: + + def _setup_data(self, artpath=None): + if not artpath: + artpath = self.small_artpath + with open(syspath(artpath)) as f: self.image_data = f.read() def tearDown(self): @@ -35,20 +58,21 @@ class EmbedartCliTest(unittest.TestCase, TestHelper): self.teardown_beets() def test_embed_art_from_file(self): + self._setup_data() album = self.add_album_fixture() item = album.items()[0] - self.run_command('embedart', '-f', self.artpath) - mediafile = MediaFile(item.path) + self.run_command('embedart', '-f', self.small_artpath) + mediafile = MediaFile(syspath(item.path)) self.assertEqual(mediafile.images[0].data, self.image_data) def test_embed_art_from_album(self): + self._setup_data() album = self.add_album_fixture() item = album.items()[0] - - album.artpath = self.artpath + album.artpath = self.small_artpath album.store() self.run_command('embedart') - mediafile = MediaFile(item.path) + mediafile = MediaFile(syspath(item.path)) self.assertEqual(mediafile.images[0].data, self.image_data) def test_art_file_missing(self): @@ -57,6 +81,33 @@ class EmbedartCliTest(unittest.TestCase, TestHelper): self.run_command('embedart', '-f', '/doesnotexist') self.assertIn(u'embedart: could not read image file:', ''.join(logs)) + @require_artresizer_compare + def test_reject_different_art(self): + self._setup_data(self.abbey_artpath) + album = self.add_album_fixture() + item = album.items()[0] + self.run_command('embedart', '-f', self.abbey_artpath) + config['embedart']['compare_threshold'] = 20 + self.run_command('embedart', '-f', self.abbey_differentpath) + mediafile = MediaFile(syspath(item.path)) + + self.assertEqual(mediafile.images[0].data, self.image_data, + 'Image written is not {0}'.format( + self.abbey_artpath)) + + @require_artresizer_compare + def test_accept_similar_art(self): + self._setup_data(self.abbey_similarpath) + album = self.add_album_fixture() + item = album.items()[0] + self.run_command('embedart', '-f', self.abbey_artpath) + config['embedart']['compare_threshold'] = 20 + self.run_command('embedart', '-f', self.abbey_similarpath) + mediafile = MediaFile(syspath(item.path)) + + self.assertEqual(mediafile.images[0].data, self.image_data, + 'Image written is not {0}'.format( + self.abbey_similarpath)) def suite(): return unittest.TestLoader().loadTestsFromName(__name__)