fetchart: add 'sources' option

Fasten cover search by enabling engines selection and prioritisation.
This commit is contained in:
Fabrice Laporte 2014-11-08 23:20:44 +01:00
parent 701ed2dae9
commit 1b694e569e
3 changed files with 70 additions and 40 deletions

View file

@ -79,17 +79,14 @@ CAA_URL = 'http://coverartarchive.org/release/{mbid}/front-500.jpg'
CAA_GROUP_URL = 'http://coverartarchive.org/release-group/{mbid}/front-500.jpg'
def caa_art(release_id):
"""Return the Cover Art Archive URL given a MusicBrainz release ID.
def caa_art(album):
"""Return the Cover Art Archive and Cover Art Archive release group URLs
using album MusicBrainz release ID and release group ID.
"""
return CAA_URL.format(mbid=release_id)
def caa_group_art(release_group_id):
"""Return the Cover Art Archive release group URL given a MusicBrainz
release group ID.
"""
return CAA_GROUP_URL.format(mbid=release_group_id)
if album.mb_albumid:
yield CAA_URL.format(mbid=album.mb_albumid)
if album.release_group_id:
yield CAA_GROUP_URL.format(mbid=album.release_group_id)
# Art from Amazon.
@ -98,10 +95,12 @@ AMAZON_URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg'
AMAZON_INDICES = (1, 2)
def art_for_asin(asin):
"""Generate URLs for an Amazon ID (ASIN) string."""
for index in AMAZON_INDICES:
yield AMAZON_URL % (asin, index)
def art_for_asin(album):
"""Generate URLs using Amazon ID (ASIN) string.
"""
if album.asin:
for index in AMAZON_INDICES:
yield AMAZON_URL % (album.asin, index)
# AlbumArt.org scraper.
@ -110,11 +109,14 @@ AAO_URL = 'http://www.albumart.org/index_detail.php'
AAO_PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"'
def aao_art(asin):
"""Return art URL from AlbumArt.org given an ASIN."""
def aao_art(album):
"""Return art URL from AlbumArt.org using album ASIN.
"""
if not album.asin:
return
# Get the page from albumart.org.
try:
resp = requests_session.get(AAO_URL, params={'asin': asin})
resp = requests_session.get(AAO_URL, params={'asin': album.asin})
log.debug(u'fetchart: scraped art URL: {0}'.format(resp.url))
except requests.RequestException:
log.debug(u'fetchart: error scraping art page')
@ -188,7 +190,8 @@ def filename_priority(filename, cover_names):
def art_in_path(path, cover_names, cautious):
"""Look for album art files in a specified directory."""
"""Look for album art files in a specified directory.
"""
if not os.path.isdir(path):
return
@ -219,6 +222,14 @@ def art_in_path(path, cover_names, cautious):
# Try each source in turn.
ART_FUNCS = {
'coverart': caa_art,
'itunes': itunes_art,
'albumart': aao_art,
'amazon': art_for_asin,
'google': google_art,
}
def _source_urls(album):
"""Generate possible source URLs for an album's art. The URLs are
@ -227,28 +238,11 @@ def _source_urls(album):
through this sequence early to avoid the cost of scraping when not
necessary.
"""
# Cover Art Archive.
if album.mb_albumid:
yield caa_art(album.mb_albumid)
if album.mb_releasegroupid:
yield caa_group_art(album.mb_releasegroupid)
# iTunes Store.
if HAVE_ITUNES:
yield itunes_art(album)
# Amazon and AlbumArt.org.
if album.asin:
for url in art_for_asin(album.asin):
yield url
url = aao_art(album.asin)
if url:
yield url
if config['fetchart']['google_search']:
url = google_art(album)
if url:
yield url
for s in config['fetchart']['sources']:
fun = ART_FUNCS[s]
for url in fun(album):
if url:
yield url
def art_for_album(album, paths, maxwidth=None, local_only=False):
@ -285,6 +279,22 @@ def art_for_album(album, paths, maxwidth=None, local_only=False):
out = ArtResizer.shared.resize(maxwidth, out)
return out
SOURCES_ALL = ['coverart', 'itunes', 'albumart', 'amazon', 'google']
def sanitize_sources(sources):
"""Remove unknown or duplicate sources while keeping original order.
"""
seen = set()
others_sources = set(SOURCES_ALL) - set(sources)
res = []
for s in sources:
if s in SOURCES_ALL + ['*']:
if not (s in seen or seen.add(s)):
res.extend(list(others_sources) if s == '*' else [s])
if not HAVE_ITUNES and 'itunes' in res:
res.remove('itunes')
return res
# PLUGIN LOGIC ###############################################################
@ -325,6 +335,7 @@ class FetchArtPlugin(BeetsPlugin):
'cautious': False,
'google_search': False,
'cover_names': ['cover', 'front', 'art', 'album', 'folder'],
'sources': SOURCES_ALL,
})
# Holds paths to downloaded images between fetching them and
@ -337,6 +348,10 @@ class FetchArtPlugin(BeetsPlugin):
self.import_stages = [self.fetch_art]
self.register_listener('import_task_files', self.assign_art)
# Remove unknown and duplicate sources
self.config['sources'] = sanitize_sources(
self.config['sources'].as_str_seq())
# Asynchronous; after music is added to the library.
def fetch_art(self, session, task):
"""Find art for the album being imported."""

View file

@ -45,6 +45,8 @@ file. The available options are:
as fallback.
Default: ``no``; remote (Web) art sources are only queried if no local art is
found in the filesystem.
- **sources**: List of websites sources to query when searching arts.
Default : ['coverart', 'itunes', 'albumart', 'amazon', 'google'] ie all sources
Here's an example that makes plugin select only images that contain *front* or
*back* keywords in their filenames::

View file

@ -12,6 +12,8 @@
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
from beetsplug import fetchart
import os.path
from _common import unittest
from helper import TestHelper
@ -41,6 +43,17 @@ class FetchartCliTest(unittest.TestCase, TestHelper):
with open(cover_path, 'r') as f:
self.assertEqual(f.read(), 'IMAGE')
def test_sanitize_sources(self):
self.assertEqual(fetchart.sanitize_sources(['google', 'unknown']),
['google'])
self.assertEqual(fetchart.sanitize_sources(['google', 'google']),
['google'])
res = fetchart.sanitize_sources(['google', '*', 'amazon'])
# don't check strict egality on lengths as itunes source may be removed
# by plugin
self.assertTrue(len(res) >= len(fetchart.SOURCES_ALL)-1 and
res[0] == 'google' and res[-1] == 'amazon')
def suite():
return unittest.TestLoader().loadTestsFromName(__name__)