diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py index a2ce1661d..4d4158ec1 100644 --- a/beetsplug/fetchart.py +++ b/beetsplug/fetchart.py @@ -29,6 +29,7 @@ from beets import importer from beets import ui from beets import util from beets import config +from beets.mediafile import _image_mime_type from beets.util.artresizer import ArtResizer from beets.util import confit from beets.util import syspath, bytestring_path @@ -230,18 +231,49 @@ class RemoteArtSource(ArtSource): with closing(self.request(candidate.url, stream=True, message=u'downloading image')) as resp: ct = resp.headers.get('Content-Type', None) - if ct not in CONTENT_TYPES: - self._log.debug( - u'not a supported image: {}', - resp.headers.get('Content-Type') or u'no content type', - ) - candidate.path = None + + # Download the image to a temporary file. As some servers + # (notably fanart.tv) have proven to return wrong Content-Types + # when images were uploaded with a bad file extension, do not + # rely on it. Instead validate the type using the file magic + # and only then determine the extension. + data = resp.iter_content(chunk_size=1024) + header = b'' + for chunk in data: + header += chunk + if len(header) >= 32: + # The imghdr module will only read 32 bytes, and our + # own additions in mediafile even less. + break + else: + # server didn't return enough data, i.e. corrupt image return - # Generate a temporary file with the correct extension. - with NamedTemporaryFile(suffix=b'.' + CONTENT_TYPES[ct][0], - delete=False) as fh: - for chunk in resp.iter_content(chunk_size=1024): + real_ct = _image_mime_type(header) + if real_ct is None: + # detection by file magic failed, fall back to the + # server-supplied Content-Type + # Is our type detection failsafe enough to drop this? + real_ct = ct + + if real_ct not in CONTENT_TYPES: + self._log.debug(u'not a supported image: {}', + real_ct or u'unknown content type') + return + + ext = b'.' + CONTENT_TYPES[real_ct][0] + + if real_ct != ct: + self._log.warn(u'Server specified {}, but returned a ' + u'{} image. Correcting the extension ' + u'to {}', + ct, real_ct, ext) + + with NamedTemporaryFile(suffix=ext, delete=False) as fh: + # write the first already loaded part of the image + fh.write(header) + # download the remaining part of the image + for chunk in data: fh.write(chunk) self._log.debug(u'downloaded art to: {0}', util.displayable_path(fh.name)) @@ -252,7 +284,6 @@ class RemoteArtSource(ArtSource): # Handling TypeError works around a urllib3 bug: # https://github.com/shazow/urllib3/issues/556 self._log.debug(u'error fetching art: {}', exc) - candidate.path = None return diff --git a/docs/changelog.rst b/docs/changelog.rst index 4cd44372d..b01af1495 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -40,9 +40,9 @@ Fixes: * Fix a crash when specifying non-ASCII format strings on the command line with the ``-f`` option to many commands. :bug:`2063` * :doc:`/plugins/fetchart`: Determine the file extension for downloaded images - based on the ``Content-Type`` header instead of hardcoding it to .jpg. - Reported in :bug:`2053`, which for now it does not fix due to - server-side issues, though. + based on the image's magic bytes and warn if result is not consistent with + the server-supplied ``Content-Type`` header instead of + hardcoding it to .jpg. :bug:`2053` 1.3.18 (May 31, 2016) --------------------- diff --git a/test/test_art.py b/test/test_art.py index 58b15d8e5..8659cf12a 100644 --- a/test/test_art.py +++ b/test/test_art.py @@ -45,16 +45,29 @@ class UseThePlugin(_common.TestCase): self.plugin = fetchart.FetchArtPlugin() -class FetchImageTest(UseThePlugin): - URL = 'http://example.com/test.jpg' - +class FetchImageHelper(_common.TestCase): + """Helper mixin for mocking requests when fetching images + with remote art sources. + """ @responses.activate def run(self, *args, **kwargs): - super(FetchImageTest, self).run(*args, **kwargs) + super(FetchImageHelper, self).run(*args, **kwargs) - def mock_response(self, content_type): - responses.add(responses.GET, self.URL, - content_type=content_type) + IMAGEHEADER = {'image/jpeg': b'\x00' * 6 + b'JFIF', + 'image/png': b'\211PNG\r\n\032\n', } + + def mock_response(self, url, content_type='image/jpeg', file_type=None): + if file_type is None: + file_type = content_type + responses.add(responses.GET, url, + content_type=content_type, + # imghdr reads 32 bytes + body=self.IMAGEHEADER.get( + file_type, b'').ljust(32, b'\x00')) + + +class FetchImageTest(FetchImageHelper, UseThePlugin): + URL = 'http://example.com/test.jpg' def setUp(self): super(FetchImageTest, self).setUp() @@ -64,17 +77,23 @@ class FetchImageTest(UseThePlugin): self.candidate = fetchart.Candidate(logger, url=self.URL) def test_invalid_type_returns_none(self): - self.mock_response('image/watercolour') + self.mock_response(self.URL, 'image/watercolour') self.source.fetch_image(self.candidate, self.extra) self.assertEqual(self.candidate.path, None) def test_jpeg_type_returns_path(self): - self.mock_response('image/jpeg') + self.mock_response(self.URL, 'image/jpeg') self.source.fetch_image(self.candidate, self.extra) self.assertNotEqual(self.candidate.path, None) def test_extension_set_by_content_type(self): - self.mock_response('image/png') + self.mock_response(self.URL, 'image/png') + self.source.fetch_image(self.candidate, self.extra) + self.assertEqual(os.path.splitext(self.candidate.path)[1], b'.png') + self.assertExists(self.candidate.path) + + def test_does_not_rely_on_server_content_type(self): + self.mock_response(self.URL, 'image/jpeg', 'image/png') self.source.fetch_image(self.candidate, self.extra) self.assertEqual(os.path.splitext(self.candidate.path)[1], b'.png') self.assertExists(self.candidate.path) @@ -128,7 +147,7 @@ class FSArtTest(UseThePlugin): self.assertEqual(candidates, paths) -class CombinedTest(UseThePlugin): +class CombinedTest(FetchImageHelper, UseThePlugin): ASIN = 'xxxx' MBID = 'releaseid' AMAZON_URL = 'http://images.amazon.com/images/P/{0}.01.LZZZZZZZ.jpg' \ @@ -143,13 +162,6 @@ class CombinedTest(UseThePlugin): self.dpath = os.path.join(self.temp_dir, b'arttest') os.mkdir(self.dpath) - @responses.activate - def run(self, *args, **kwargs): - super(CombinedTest, self).run(*args, **kwargs) - - def mock_response(self, url, content_type='image/jpeg'): - responses.add(responses.GET, url, content_type=content_type) - def test_main_interface_returns_amazon_art(self): self.mock_response(self.AMAZON_URL) album = _common.Bag(asin=self.ASIN)