diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py
index badac9d79..4a94ca643 100644
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@@ -38,7 +38,7 @@ except ImportError:
HAVE_ITUNES = False
IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg']
-CONTENT_TYPES = ('image/jpeg',)
+CONTENT_TYPES = ('image/jpeg', 'image/gif')
DOWNLOAD_EXTENSION = '.jpg'
requests_session = requests.Session()
@@ -171,13 +171,16 @@ class Wikipedia(ArtSource):
PREFIX dbpprop:
PREFIX owl:
PREFIX rdfs:
- SELECT DISTINCT ?coverFilename WHERE {{
+ PREFIX foaf:
+
+ SELECT DISTINCT ?pageId ?coverFilename WHERE {{
+ ?subject owl:wikiPageID ?pageId .
?subject dbpprop:name ?name .
?subject rdfs:label ?label .
{{ ?subject dbpprop:artist ?artist }}
UNION
{{ ?subject owl:artist ?artist }}
- {{ ?artist rdfs:label "{artist}"@en }}
+ {{ ?artist foaf:name "{artist}"@en }}
UNION
{{ ?artist dbpprop:name "{artist}"@en }}
?subject rdf:type .
@@ -191,29 +194,62 @@ class Wikipedia(ArtSource):
return
# Find the name of the cover art filename on DBpedia
- cover_filename = None
+ cover_filename, page_id = None, None
dbpedia_response = requests.get(
self.DBPEDIA_URL,
params={
'format': 'application/sparql-results+json',
'timeout': 2500,
- 'query': self.SPARQL_QUERY.format(artist=album.albumartist,
- album=album.album)
+ 'query': self.SPARQL_QUERY.format(
+ artist=album.albumartist.title(), album=album.album)
}, headers={'content-type': 'application/json'})
try:
data = dbpedia_response.json()
results = data['results']['bindings']
if results:
- cover_filename = results[0]['coverFilename']['value']
+ cover_filename = 'File:' + results[0]['coverFilename']['value']
+ page_id = results[0]['pageId']['value']
else:
self._log.debug(u'album not found on dbpedia')
- except:
+ except (ValueError, KeyError, IndexError):
self._log.debug(u'error scraping dbpedia album page')
# Ensure we have a filename before attempting to query wikipedia
- if not cover_filename:
+ if not (cover_filename and page_id):
return
+ # DBPedia sometimes provides an incomplete cover_filename, indicated
+ # by the filename having a space before the extension, e.g., 'foo .bar'
+ # An additional Wikipedia call can help to find the real filename.
+ # This may be removed once the DBPedia issue is resolved, see:
+ # https://github.com/dbpedia/extraction-framework/issues/396
+ if '.' not in cover_filename.split(' .')[-1]:
+ self._log.debug(u'dbpedia provided incomplete cover_filename')
+ lpart, rpart = cover_filename.rsplit(' .', 1)
+
+ # Query all the images in the page
+ wikipedia_response = requests.get(self.WIKIPEDIA_URL, params={
+ 'format': 'json',
+ 'action': 'query',
+ 'continue': '',
+ 'prop': 'images',
+ 'pageids': page_id},
+ headers={'content-type': 'application/json'})
+
+ # Try to see if one of the images on the pages matches our
+ # imcomplete cover_filename
+ try:
+ data = wikipedia_response.json()
+ results = data['query']['pages'][page_id]['images']
+ for result in results:
+ if re.match(re.escape(lpart) + r'.*?\.' + re.escape(rpart),
+ result['title']):
+ cover_filename = result['title']
+ break
+ except (ValueError, KeyError):
+ self._log.debug(u'failed to retrieve a cover_filename')
+ return
+
# Find the absolute url of the cover art on Wikipedia
wikipedia_response = requests.get(self.WIKIPEDIA_URL, params={
'format': 'json',
@@ -221,15 +257,16 @@ class Wikipedia(ArtSource):
'continue': '',
'prop': 'imageinfo',
'iiprop': 'url',
- 'titles': ('File:' + cover_filename).encode('utf-8')},
+ 'titles': cover_filename.encode('utf-8')},
headers={'content-type': 'application/json'})
+
try:
data = wikipedia_response.json()
results = data['query']['pages']
for _, result in results.iteritems():
image_url = result['imageinfo'][0]['url']
yield image_url
- except:
+ except (ValueError, KeyError, IndexError):
self._log.debug(u'error scraping wikipedia imageinfo')
return
diff --git a/docs/changelog.rst b/docs/changelog.rst
index eb80cddbc..487c27ad9 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -27,6 +27,8 @@ Fixes:
* Fix sorting by paths when case-insensitive. :bug:`1451`
* :doc:`/plugins/embedart`: Avoid an error when trying to embed invalid images
into MPEG-4 files.
+* :doc:`/plugins/fetchart`: The Wikipedia source is now able to better deal
+ with non-typical cased artists (e.g., alt-J, dEUS).
1.3.13 (April 24, 2015)