Merge pull request #1194 from tomjaspers/master

Fetchart: add fetching artwork from Wikipedia
2026-01-24 17:12:07 +01:00 · 2015-01-25 12:59:13 -08:00 · 2015-01-25 12:59:13 -08:00 · ec2e9891c3
commit ec2e9891c3
parent 9fe0eab26f 1a799bb77f
2 changed files with 76 additions and 3 deletions
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@ -158,6 +158,77 @@ class ITunesStore(ArtSource):
            self._log.debug(u'album not found in iTunes Store')


+class Wikipedia(ArtSource):
+    # Art from Wikipedia (queried through DBpedia)
+    DBPEDIA_URL = 'http://dbpedia.org/sparql'
+    WIKIPEDIA_URL = 'http://en.wikipedia.org/w/api.php'
+    SPARQL_QUERY = '''PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+                 PREFIX dbpprop: <http://dbpedia.org/property/>
+                 PREFIX owl: <http://dbpedia.org/ontology/>
+                 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+                 SELECT DISTINCT ?coverFilename WHERE {{
+                   ?subject dbpprop:name ?name .
+                   ?subject rdfs:label ?label .
+                   {{ ?subject dbpprop:artist ?artist }}
+                     UNION
+                   {{ ?subject owl:artist ?artist }}
+                   {{ ?artist rdfs:label "{artist}"@en }}
+                     UNION
+                   {{ ?artist dbpprop:name "{artist}"@en }}
+                   ?subject rdf:type <http://dbpedia.org/ontology/Album> .
+                   ?subject dbpprop:cover ?coverFilename .
+                   FILTER ( regex(?name, "{album}", "i") )
+                  }}
+                 Limit 1'''
+
+    def get(self, album):
+        if not (album.albumartist and album.album):
+            return
+
+        # Find the name of the cover art filename on DBpedia
+        cover_filename = None
+        dbpedia_response = requests.get(
+            self.DBPEDIA_URL,
+            params={
+                'format': 'application/sparql-results+json',
+                'timeout': 2500,
+                'query': self.SPARQL_QUERY.format(artist=album.albumartist,
+                                                  album=album.album)
+            }, headers={'content-type': 'application/json'})
+        try:
+            data = dbpedia_response.json()
+            results = data['results']['bindings']
+            if results:
+                cover_filename = results[0]['coverFilename']['value']
+            else:
+                self._log.debug(u'album not found on dbpedia')
+        except:
+            self._log.debug(u'error scraping dbpedia album page')
+
+        # Ensure we have a filename before attempting to query wikipedia
+        if not cover_filename:
+            return
+
+        # Find the absolute url of the cover art on Wikipedia
+        wikipedia_response = requests.get(self.WIKIPEDIA_URL, params={
+            'format': 'json',
+            'action': 'query',
+            'continue': '',
+            'prop': 'imageinfo',
+            'iiprop': 'url',
+            'titles': ('File:' + cover_filename).encode('utf-8')},
+            headers={'content-type': 'application/json'})
+        try:
+            data = wikipedia_response.json()
+            results = data['query']['pages']
+            for _, result in results.iteritems():
+                image_url = result['imageinfo'][0]['url']
+                yield image_url
+        except:
+            self._log.debug(u'error scraping wikipedia imageinfo')
+            return
+
+
 class FileSystem(ArtSource):
    """Art from the filesystem"""
    @staticmethod
@ -203,7 +274,8 @@ class FileSystem(ArtSource):

 # Try each source in turn.

-SOURCES_ALL = [u'coverart', u'itunes', u'amazon', u'albumart', u'google']
+SOURCES_ALL = [u'coverart', u'itunes', u'amazon', u'albumart', u'google',
+               u'wikipedia']

 ART_FUNCS = {
    u'coverart': CoverArtArchive,
@ -211,6 +283,7 @@ ART_FUNCS = {
    u'albumart': AlbumArtOrg,
    u'amazon': Amazon,
    u'google': GoogleImages,
+    u'wikipedia': Wikipedia,
 }

 # PLUGIN LOGIC ###############################################################
--- a/docs/plugins/fetchart.rst
+++ b/docs/plugins/fetchart.rst
@ -47,7 +47,7 @@ file. The available options are:
  found in the filesystem.
 - **sources**: List of sources to search for images. An asterisk `*` expands
  to all available sources.
-  Default: ``coverart itunes albumart amazon google``, i.e., all sources
+  Default: ``coverart itunes albumart amazon google wikipedia``, i.e., all sources

 Here's an example that makes plugin select only images that contain *front* or
 *back* keywords in their filenames and prioritizes the iTunes source over
@ -97,7 +97,7 @@ Album Art Sources

 By default, this plugin searches for art in the local filesystem as well as on
 the Cover Art Archive, the iTunes Store, Amazon, AlbumArt.org,
-and Google Image Search, in that order. You can reorder the sources or remove
+and Google Image Search, and Wikipedia, in that order. You can reorder the sources or remove
 some to speed up the process using the ``sources`` configuration option.

 When looking for local album art, beets checks for image files located in the