diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py index badac9d79..4a94ca643 100644 --- a/beetsplug/fetchart.py +++ b/beetsplug/fetchart.py @@ -38,7 +38,7 @@ except ImportError: HAVE_ITUNES = False IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg'] -CONTENT_TYPES = ('image/jpeg',) +CONTENT_TYPES = ('image/jpeg', 'image/gif') DOWNLOAD_EXTENSION = '.jpg' requests_session = requests.Session() @@ -171,13 +171,16 @@ class Wikipedia(ArtSource): PREFIX dbpprop: PREFIX owl: PREFIX rdfs: - SELECT DISTINCT ?coverFilename WHERE {{ + PREFIX foaf: + + SELECT DISTINCT ?pageId ?coverFilename WHERE {{ + ?subject owl:wikiPageID ?pageId . ?subject dbpprop:name ?name . ?subject rdfs:label ?label . {{ ?subject dbpprop:artist ?artist }} UNION {{ ?subject owl:artist ?artist }} - {{ ?artist rdfs:label "{artist}"@en }} + {{ ?artist foaf:name "{artist}"@en }} UNION {{ ?artist dbpprop:name "{artist}"@en }} ?subject rdf:type . @@ -191,29 +194,62 @@ class Wikipedia(ArtSource): return # Find the name of the cover art filename on DBpedia - cover_filename = None + cover_filename, page_id = None, None dbpedia_response = requests.get( self.DBPEDIA_URL, params={ 'format': 'application/sparql-results+json', 'timeout': 2500, - 'query': self.SPARQL_QUERY.format(artist=album.albumartist, - album=album.album) + 'query': self.SPARQL_QUERY.format( + artist=album.albumartist.title(), album=album.album) }, headers={'content-type': 'application/json'}) try: data = dbpedia_response.json() results = data['results']['bindings'] if results: - cover_filename = results[0]['coverFilename']['value'] + cover_filename = 'File:' + results[0]['coverFilename']['value'] + page_id = results[0]['pageId']['value'] else: self._log.debug(u'album not found on dbpedia') - except: + except (ValueError, KeyError, IndexError): self._log.debug(u'error scraping dbpedia album page') # Ensure we have a filename before attempting to query wikipedia - if not cover_filename: + if not (cover_filename and page_id): return + # DBPedia sometimes provides an incomplete cover_filename, indicated + # by the filename having a space before the extension, e.g., 'foo .bar' + # An additional Wikipedia call can help to find the real filename. + # This may be removed once the DBPedia issue is resolved, see: + # https://github.com/dbpedia/extraction-framework/issues/396 + if '.' not in cover_filename.split(' .')[-1]: + self._log.debug(u'dbpedia provided incomplete cover_filename') + lpart, rpart = cover_filename.rsplit(' .', 1) + + # Query all the images in the page + wikipedia_response = requests.get(self.WIKIPEDIA_URL, params={ + 'format': 'json', + 'action': 'query', + 'continue': '', + 'prop': 'images', + 'pageids': page_id}, + headers={'content-type': 'application/json'}) + + # Try to see if one of the images on the pages matches our + # imcomplete cover_filename + try: + data = wikipedia_response.json() + results = data['query']['pages'][page_id]['images'] + for result in results: + if re.match(re.escape(lpart) + r'.*?\.' + re.escape(rpart), + result['title']): + cover_filename = result['title'] + break + except (ValueError, KeyError): + self._log.debug(u'failed to retrieve a cover_filename') + return + # Find the absolute url of the cover art on Wikipedia wikipedia_response = requests.get(self.WIKIPEDIA_URL, params={ 'format': 'json', @@ -221,15 +257,16 @@ class Wikipedia(ArtSource): 'continue': '', 'prop': 'imageinfo', 'iiprop': 'url', - 'titles': ('File:' + cover_filename).encode('utf-8')}, + 'titles': cover_filename.encode('utf-8')}, headers={'content-type': 'application/json'}) + try: data = wikipedia_response.json() results = data['query']['pages'] for _, result in results.iteritems(): image_url = result['imageinfo'][0]['url'] yield image_url - except: + except (ValueError, KeyError, IndexError): self._log.debug(u'error scraping wikipedia imageinfo') return diff --git a/docs/changelog.rst b/docs/changelog.rst index eb80cddbc..487c27ad9 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -27,6 +27,8 @@ Fixes: * Fix sorting by paths when case-insensitive. :bug:`1451` * :doc:`/plugins/embedart`: Avoid an error when trying to embed invalid images into MPEG-4 files. +* :doc:`/plugins/fetchart`: The Wikipedia source is now able to better deal + with non-typical cased artists (e.g., alt-J, dEUS). 1.3.13 (April 24, 2015)