Merge pull request #2072 from beetbox/art_rectify_extension_via_imghdr

Rectify artwork extension via imghdr
This commit is contained in:
wordofglass 2016-06-25 22:06:32 +02:00 committed by GitHub
commit 86ed8e9650
3 changed files with 75 additions and 32 deletions

View file

@ -29,6 +29,7 @@ from beets import importer
from beets import ui
from beets import util
from beets import config
from beets.mediafile import _image_mime_type
from beets.util.artresizer import ArtResizer
from beets.util import confit
from beets.util import syspath, bytestring_path
@ -230,18 +231,49 @@ class RemoteArtSource(ArtSource):
with closing(self.request(candidate.url, stream=True,
message=u'downloading image')) as resp:
ct = resp.headers.get('Content-Type', None)
if ct not in CONTENT_TYPES:
self._log.debug(
u'not a supported image: {}',
resp.headers.get('Content-Type') or u'no content type',
)
candidate.path = None
# Download the image to a temporary file. As some servers
# (notably fanart.tv) have proven to return wrong Content-Types
# when images were uploaded with a bad file extension, do not
# rely on it. Instead validate the type using the file magic
# and only then determine the extension.
data = resp.iter_content(chunk_size=1024)
header = b''
for chunk in data:
header += chunk
if len(header) >= 32:
# The imghdr module will only read 32 bytes, and our
# own additions in mediafile even less.
break
else:
# server didn't return enough data, i.e. corrupt image
return
# Generate a temporary file with the correct extension.
with NamedTemporaryFile(suffix=b'.' + CONTENT_TYPES[ct][0],
delete=False) as fh:
for chunk in resp.iter_content(chunk_size=1024):
real_ct = _image_mime_type(header)
if real_ct is None:
# detection by file magic failed, fall back to the
# server-supplied Content-Type
# Is our type detection failsafe enough to drop this?
real_ct = ct
if real_ct not in CONTENT_TYPES:
self._log.debug(u'not a supported image: {}',
real_ct or u'unknown content type')
return
ext = b'.' + CONTENT_TYPES[real_ct][0]
if real_ct != ct:
self._log.warn(u'Server specified {}, but returned a '
u'{} image. Correcting the extension '
u'to {}',
ct, real_ct, ext)
with NamedTemporaryFile(suffix=ext, delete=False) as fh:
# write the first already loaded part of the image
fh.write(header)
# download the remaining part of the image
for chunk in data:
fh.write(chunk)
self._log.debug(u'downloaded art to: {0}',
util.displayable_path(fh.name))
@ -252,7 +284,6 @@ class RemoteArtSource(ArtSource):
# Handling TypeError works around a urllib3 bug:
# https://github.com/shazow/urllib3/issues/556
self._log.debug(u'error fetching art: {}', exc)
candidate.path = None
return

View file

@ -40,9 +40,9 @@ Fixes:
* Fix a crash when specifying non-ASCII format strings on the command line
with the ``-f`` option to many commands. :bug:`2063`
* :doc:`/plugins/fetchart`: Determine the file extension for downloaded images
based on the ``Content-Type`` header instead of hardcoding it to .jpg.
Reported in :bug:`2053`, which for now it does not fix due to
server-side issues, though.
based on the image's magic bytes and warn if result is not consistent with
the server-supplied ``Content-Type`` header instead of
hardcoding it to .jpg. :bug:`2053`
1.3.18 (May 31, 2016)
---------------------

View file

@ -45,16 +45,29 @@ class UseThePlugin(_common.TestCase):
self.plugin = fetchart.FetchArtPlugin()
class FetchImageTest(UseThePlugin):
URL = 'http://example.com/test.jpg'
class FetchImageHelper(_common.TestCase):
"""Helper mixin for mocking requests when fetching images
with remote art sources.
"""
@responses.activate
def run(self, *args, **kwargs):
super(FetchImageTest, self).run(*args, **kwargs)
super(FetchImageHelper, self).run(*args, **kwargs)
def mock_response(self, content_type):
responses.add(responses.GET, self.URL,
content_type=content_type)
IMAGEHEADER = {'image/jpeg': b'\x00' * 6 + b'JFIF',
'image/png': b'\211PNG\r\n\032\n', }
def mock_response(self, url, content_type='image/jpeg', file_type=None):
if file_type is None:
file_type = content_type
responses.add(responses.GET, url,
content_type=content_type,
# imghdr reads 32 bytes
body=self.IMAGEHEADER.get(
file_type, b'').ljust(32, b'\x00'))
class FetchImageTest(FetchImageHelper, UseThePlugin):
URL = 'http://example.com/test.jpg'
def setUp(self):
super(FetchImageTest, self).setUp()
@ -64,17 +77,23 @@ class FetchImageTest(UseThePlugin):
self.candidate = fetchart.Candidate(logger, url=self.URL)
def test_invalid_type_returns_none(self):
self.mock_response('image/watercolour')
self.mock_response(self.URL, 'image/watercolour')
self.source.fetch_image(self.candidate, self.extra)
self.assertEqual(self.candidate.path, None)
def test_jpeg_type_returns_path(self):
self.mock_response('image/jpeg')
self.mock_response(self.URL, 'image/jpeg')
self.source.fetch_image(self.candidate, self.extra)
self.assertNotEqual(self.candidate.path, None)
def test_extension_set_by_content_type(self):
self.mock_response('image/png')
self.mock_response(self.URL, 'image/png')
self.source.fetch_image(self.candidate, self.extra)
self.assertEqual(os.path.splitext(self.candidate.path)[1], b'.png')
self.assertExists(self.candidate.path)
def test_does_not_rely_on_server_content_type(self):
self.mock_response(self.URL, 'image/jpeg', 'image/png')
self.source.fetch_image(self.candidate, self.extra)
self.assertEqual(os.path.splitext(self.candidate.path)[1], b'.png')
self.assertExists(self.candidate.path)
@ -128,7 +147,7 @@ class FSArtTest(UseThePlugin):
self.assertEqual(candidates, paths)
class CombinedTest(UseThePlugin):
class CombinedTest(FetchImageHelper, UseThePlugin):
ASIN = 'xxxx'
MBID = 'releaseid'
AMAZON_URL = 'http://images.amazon.com/images/P/{0}.01.LZZZZZZZ.jpg' \
@ -143,13 +162,6 @@ class CombinedTest(UseThePlugin):
self.dpath = os.path.join(self.temp_dir, b'arttest')
os.mkdir(self.dpath)
@responses.activate
def run(self, *args, **kwargs):
super(CombinedTest, self).run(*args, **kwargs)
def mock_response(self, url, content_type='image/jpeg'):
responses.add(responses.GET, url, content_type=content_type)
def test_main_interface_returns_amazon_art(self):
self.mock_response(self.AMAZON_URL)
album = _common.Bag(asin=self.ASIN)