fetchart: improve error resilience of the Content-Type detection by file magic

2026-01-03 22:42:44 +01:00 · 2016-06-25 13:43:20 +02:00 · 2016-06-25 13:43:20 +02:00 · 697291b04e
commit 697291b04e
parent 9968288358
2 changed files with 38 additions and 32 deletions
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@ -238,42 +238,47 @@ class RemoteArtSource(ArtSource):
                # rely on it. Instead validate the type using the file magic
                # and only then determine the extension.
                data = resp.iter_content(chunk_size=1024)
-                try:
-                    # stream only a small part of the image to get its header
-                    chunk = next(data)
-                except StopIteration:
-                    pass
+                header = b''
+                for chunk in data:
+                    header += chunk
+                    if len(header) >= 32:
+                        # The imghdr module will only read 32 bytes, and our
+                        # own additions in mediafile even less.
+                        break
                else:
-                    real_ct = _image_mime_type(chunk)
-                    if real_ct is None:
-                        # detection by file magic failed, fall back to the
-                        # server-supplied Content-Type
-                        # Is our type detection failsafe enough to drop this?
-                        real_ct = ct
+                    # server didn't return enough data, i.e. corrupt image
+                    return

-                    if real_ct not in CONTENT_TYPES:
-                        self._log.debug(u'not a supported image: {}',
-                                        real_ct or u'unknown content type')
-                        candidate.path = None
-                        return
+                real_ct = _image_mime_type(header)
+                if real_ct is None:
+                    # detection by file magic failed, fall back to the
+                    # server-supplied Content-Type
+                    # Is our type detection failsafe enough to drop this?
+                    real_ct = ct

-                    ext = b'.' + CONTENT_TYPES[real_ct][0]
-                    if real_ct != ct:
-                        self._log.warn(u'Server specified {}, but returned a '
-                                       u'{} image. Correcting the extension '
-                                       u'to {}',
-                                       ct, real_ct, ext)
+                if real_ct not in CONTENT_TYPES:
+                    self._log.debug(u'not a supported image: {}',
+                                    real_ct or u'unknown content type')
+                    candidate.path = None
+                    return

-                    with NamedTemporaryFile(suffix=ext, delete=False) as fh:
-                        # write the first already loaded part of the image
+                ext = b'.' + CONTENT_TYPES[real_ct][0]
+
+                if real_ct != ct:
+                    self._log.warn(u'Server specified {}, but returned a '
+                                   u'{} image. Correcting the extension '
+                                   u'to {}',
+                                   ct, real_ct, ext)
+
+                with NamedTemporaryFile(suffix=ext, delete=False) as fh:
+                    # write the first already loaded part of the image
+                    fh.write(header)
+                    # download the remaining part of the image
+                    for chunk in data:
                        fh.write(chunk)
-                        # download the remaining part of the image
-                        for chunk in data:
-                            fh.write(chunk)
-                    self._log.debug(u'downloaded art to: {0}',
-                                    util.displayable_path(fh.name))
-                    candidate.path = util.bytestring_path(fh.name)
-
+                self._log.debug(u'downloaded art to: {0}',
+                                util.displayable_path(fh.name))
+                candidate.path = util.bytestring_path(fh.name)
                return

        except (IOError, requests.RequestException, TypeError) as exc:
--- a/test/test_art.py
+++ b/test/test_art.py
@ -60,7 +60,8 @@ class FetchImageHelper(_common.TestCase):
            file_type = content_type
        responses.add(responses.GET, url,
                      content_type=content_type,
-                      body=IMAGEHEADER.get(file_type, b'\x00' * 32))
+                      # imghdr reads 32 bytes
+                      body=IMAGEHEADER.get(file_type, b'').ljust(32, b'\x00'))


 class FetchImageTest(FetchImageHelper, UseThePlugin):