fetchart: improve error resilience of the Content-Type detection by file magic

This commit is contained in:
wordofglass 2016-06-25 13:43:20 +02:00
parent 9968288358
commit 697291b04e
2 changed files with 38 additions and 32 deletions

View file

@ -238,42 +238,47 @@ class RemoteArtSource(ArtSource):
# rely on it. Instead validate the type using the file magic
# and only then determine the extension.
data = resp.iter_content(chunk_size=1024)
try:
# stream only a small part of the image to get its header
chunk = next(data)
except StopIteration:
pass
header = b''
for chunk in data:
header += chunk
if len(header) >= 32:
# The imghdr module will only read 32 bytes, and our
# own additions in mediafile even less.
break
else:
real_ct = _image_mime_type(chunk)
if real_ct is None:
# detection by file magic failed, fall back to the
# server-supplied Content-Type
# Is our type detection failsafe enough to drop this?
real_ct = ct
# server didn't return enough data, i.e. corrupt image
return
if real_ct not in CONTENT_TYPES:
self._log.debug(u'not a supported image: {}',
real_ct or u'unknown content type')
candidate.path = None
return
real_ct = _image_mime_type(header)
if real_ct is None:
# detection by file magic failed, fall back to the
# server-supplied Content-Type
# Is our type detection failsafe enough to drop this?
real_ct = ct
ext = b'.' + CONTENT_TYPES[real_ct][0]
if real_ct != ct:
self._log.warn(u'Server specified {}, but returned a '
u'{} image. Correcting the extension '
u'to {}',
ct, real_ct, ext)
if real_ct not in CONTENT_TYPES:
self._log.debug(u'not a supported image: {}',
real_ct or u'unknown content type')
candidate.path = None
return
with NamedTemporaryFile(suffix=ext, delete=False) as fh:
# write the first already loaded part of the image
ext = b'.' + CONTENT_TYPES[real_ct][0]
if real_ct != ct:
self._log.warn(u'Server specified {}, but returned a '
u'{} image. Correcting the extension '
u'to {}',
ct, real_ct, ext)
with NamedTemporaryFile(suffix=ext, delete=False) as fh:
# write the first already loaded part of the image
fh.write(header)
# download the remaining part of the image
for chunk in data:
fh.write(chunk)
# download the remaining part of the image
for chunk in data:
fh.write(chunk)
self._log.debug(u'downloaded art to: {0}',
util.displayable_path(fh.name))
candidate.path = util.bytestring_path(fh.name)
self._log.debug(u'downloaded art to: {0}',
util.displayable_path(fh.name))
candidate.path = util.bytestring_path(fh.name)
return
except (IOError, requests.RequestException, TypeError) as exc:

View file

@ -60,7 +60,8 @@ class FetchImageHelper(_common.TestCase):
file_type = content_type
responses.add(responses.GET, url,
content_type=content_type,
body=IMAGEHEADER.get(file_type, b'\x00' * 32))
# imghdr reads 32 bytes
body=IMAGEHEADER.get(file_type, b'').ljust(32, b'\x00'))
class FetchImageTest(FetchImageHelper, UseThePlugin):