Merge pull request #1075 from Kraymer/1066-fetchart_sources

fetchart: add 'sources' option
This commit is contained in:
Adrian Sampson 2014-11-11 10:04:45 -08:00
commit 288fb0da4c
4 changed files with 97 additions and 58 deletions

View file

@ -79,17 +79,14 @@ CAA_URL = 'http://coverartarchive.org/release/{mbid}/front-500.jpg'
CAA_GROUP_URL = 'http://coverartarchive.org/release-group/{mbid}/front-500.jpg'
def caa_art(release_id):
"""Return the Cover Art Archive URL given a MusicBrainz release ID.
def caa_art(album):
"""Return the Cover Art Archive and Cover Art Archive release group URLs
using album MusicBrainz release ID and release group ID.
"""
return CAA_URL.format(mbid=release_id)
def caa_group_art(release_group_id):
"""Return the Cover Art Archive release group URL given a MusicBrainz
release group ID.
"""
return CAA_GROUP_URL.format(mbid=release_group_id)
if album.mb_albumid:
yield CAA_URL.format(mbid=album.mb_albumid)
if album.release_group_id:
yield CAA_GROUP_URL.format(mbid=album.release_group_id)
# Art from Amazon.
@ -98,10 +95,12 @@ AMAZON_URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg'
AMAZON_INDICES = (1, 2)
def art_for_asin(asin):
"""Generate URLs for an Amazon ID (ASIN) string."""
for index in AMAZON_INDICES:
yield AMAZON_URL % (asin, index)
def art_for_asin(album):
"""Generate URLs using Amazon ID (ASIN) string.
"""
if album.asin:
for index in AMAZON_INDICES:
yield AMAZON_URL % (album.asin, index)
# AlbumArt.org scraper.
@ -110,11 +109,14 @@ AAO_URL = 'http://www.albumart.org/index_detail.php'
AAO_PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"'
def aao_art(asin):
"""Return art URL from AlbumArt.org given an ASIN."""
def aao_art(album):
"""Return art URL from AlbumArt.org using album ASIN.
"""
if not album.asin:
return
# Get the page from albumart.org.
try:
resp = requests_session.get(AAO_URL, params={'asin': asin})
resp = requests_session.get(AAO_URL, params={'asin': album.asin})
log.debug(u'fetchart: scraped art URL: {0}'.format(resp.url))
except requests.RequestException:
log.debug(u'fetchart: error scraping art page')
@ -124,7 +126,7 @@ def aao_art(asin):
m = re.search(AAO_PAT, resp.text)
if m:
image_url = m.group(1)
return image_url
yield image_url
else:
log.debug(u'fetchart: no image found on page')
@ -138,6 +140,8 @@ def google_art(album):
"""Return art URL from google.org given an album title and
interpreter.
"""
if not (album.albumartist and album.album):
return
search_string = (album.albumartist + ',' + album.album).encode('utf-8')
response = requests_session.get(GOOGLE_URL, params={
'v': '1.0',
@ -151,7 +155,7 @@ def google_art(album):
data = results['responseData']
dataInfo = data['results']
for myUrl in dataInfo:
return myUrl['unescapedUrl']
yield myUrl['unescapedUrl']
except:
log.debug(u'fetchart: error scraping art page')
return
@ -168,7 +172,7 @@ def itunes_art(album):
if itunes_album.get_artwork()['100']:
small_url = itunes_album.get_artwork()['100']
big_url = small_url.replace('100x100', '1200x1200')
return big_url
yield big_url
else:
log.debug(u'fetchart: album has no artwork in iTunes Store')
except IndexError:
@ -177,6 +181,7 @@ def itunes_art(album):
# Art from the filesystem.
def filename_priority(filename, cover_names):
"""Sort order for image names.
@ -188,7 +193,8 @@ def filename_priority(filename, cover_names):
def art_in_path(path, cover_names, cautious):
"""Look for album art files in a specified directory."""
"""Look for album art files in a specified directory.
"""
if not os.path.isdir(path):
return
@ -219,35 +225,27 @@ def art_in_path(path, cover_names, cautious):
# Try each source in turn.
SOURCES_ALL = [u'coverart', u'itunes', u'amazon', u'albumart', u'google']
def _source_urls(album):
ART_FUNCS = {
u'coverart': caa_art,
u'itunes': itunes_art,
u'albumart': aao_art,
u'amazon': art_for_asin,
u'google': google_art,
}
def _source_urls(album, sources=SOURCES_ALL):
"""Generate possible source URLs for an album's art. The URLs are
not guaranteed to work so they each need to be attempted in turn.
This allows the main `art_for_album` function to abort iteration
through this sequence early to avoid the cost of scraping when not
necessary.
"""
# Cover Art Archive.
if album.mb_albumid:
yield caa_art(album.mb_albumid)
if album.mb_releasegroupid:
yield caa_group_art(album.mb_releasegroupid)
# iTunes Store.
if HAVE_ITUNES:
yield itunes_art(album)
# Amazon and AlbumArt.org.
if album.asin:
for url in art_for_asin(album.asin):
yield url
url = aao_art(album.asin)
if url:
yield url
if config['fetchart']['google_search']:
url = google_art(album)
if url:
for s in sources:
urls = ART_FUNCS[s](album)
for url in urls:
yield url
@ -273,7 +271,8 @@ def art_for_album(album, paths, maxwidth=None, local_only=False):
# Web art sources.
remote_priority = config['fetchart']['remote_priority'].get(bool)
if not local_only and (remote_priority or not out):
for url in _source_urls(album):
for url in _source_urls(album,
config['fetchart']['sources'].as_str_seq()):
if maxwidth:
url = ArtResizer.shared.proxy_url(maxwidth, url)
candidate = _fetch_image(url)
@ -286,6 +285,20 @@ def art_for_album(album, paths, maxwidth=None, local_only=False):
return out
def sanitize_sources(sources):
"""Remove unknown or duplicate sources while keeping original order.
"""
seen = set()
others_sources = set(SOURCES_ALL) - set(sources)
res = []
for s in sources:
if s in SOURCES_ALL + ['*']:
if not (s in seen or seen.add(s)):
res.extend(list(others_sources) if s == '*' else [s])
if not HAVE_ITUNES and 'itunes' in res:
res.remove('itunes')
return res
# PLUGIN LOGIC ###############################################################
@ -325,6 +338,7 @@ class FetchArtPlugin(BeetsPlugin):
'cautious': False,
'google_search': False,
'cover_names': ['cover', 'front', 'art', 'album', 'folder'],
'sources': SOURCES_ALL,
})
# Holds paths to downloaded images between fetching them and
@ -337,6 +351,9 @@ class FetchArtPlugin(BeetsPlugin):
self.import_stages = [self.fetch_art]
self.register_listener('import_task_files', self.assign_art)
self.config['sources'] = sanitize_sources(
self.config['sources'].as_str_seq())
# Asynchronous; after music is added to the library.
def fetch_art(self, session, task):
"""Find art for the album being imported."""

View file

@ -35,7 +35,7 @@ file. The available options are:
contain one of the keywords in ``cover_names``.
Default: ``no``.
- **cover_names**: Prioritize images containing words in this list.
Default: ``['cover', 'front', 'art', 'album', 'folder']``.
Default: ``cover front art album folder``.
- **google_search**: Gather images from Google Image Search.
Default: ``no``.
- **maxwidth**: A maximum image width to downscale fetched images if they are
@ -45,13 +45,18 @@ file. The available options are:
as fallback.
Default: ``no``; remote (Web) art sources are only queried if no local art is
found in the filesystem.
- **sources**: List of websites sources to query when searching arts. Star char
`*` expands to all available sources.
Default : ``coverart itunes albumart amazon google`` ie all sources
Here's an example that makes plugin select only images that contain *front* or
*back* keywords in their filenames::
*back* keywords in their filenames and prioritize 'itunes' over others
sources::
fetchart:
cautious: true
cover_names: front back
sources: itunes *
Manually Fetching Album Art
@ -90,9 +95,10 @@ environment variable so that ImageMagick comes first or use PIL instead.
Album Art Sources
-----------------
Currently, this plugin searches for art in the local filesystem as well as on
the Cover Art Archive, the iTunes Store (optionally), Amazon, AlbumArt.org,
and Google Image Search (optionally), in that order.
By default, this plugin searches for art in the local filesystem as well as on
the Cover Art Archive, the iTunes Store, Amazon, AlbumArt.org,
and Google Image Search, in that order. You can remove sources to speed up the
process, or reorder them using the ``sources`` configuration option.
When looking for local album art, beets checks for image files located in the
same folder as the music files you're importing. Beets prefers to use an image

View file

@ -104,7 +104,7 @@ class CombinedTest(_common.TestCase):
os.mkdir(self.dpath)
# Set up configuration.
fetchart.FetchArtPlugin()
self.plugin = fetchart.FetchArtPlugin()
@responses.activate
def run(self, *args, **kwargs):
@ -161,7 +161,8 @@ class CombinedTest(_common.TestCase):
def test_local_only_does_not_access_network(self):
album = _common.Bag(mb_albumid=self.MBID, asin=self.ASIN)
artpath = fetchart.art_for_album(album, [self.dpath], local_only=True)
artpath = fetchart.art_for_album(album, [self.dpath],
local_only=True)
self.assertEqual(artpath, None)
self.assertEqual(len(responses.calls), 0)
@ -195,13 +196,15 @@ class AAOTest(_common.TestCase):
alt="View larger image" width="17" height="15" border="0"/></a>
"""
self.mock_response(self.AAO_URL, body)
res = fetchart.aao_art(self.ASIN)
self.assertEqual(res, 'TARGET_URL')
album = _common.Bag(asin=self.ASIN)
res = fetchart.aao_art(album)
self.assertEqual(list(res)[0], 'TARGET_URL')
def test_aao_scraper_returns_none_when_no_image_present(self):
def test_aao_scraper_returns_no_result_when_no_image_present(self):
self.mock_response(self.AAO_URL, 'blah blah')
res = fetchart.aao_art(self.ASIN)
self.assertEqual(res, None)
album = _common.Bag(asin=self.ASIN)
res = fetchart.aao_art(album)
self.assertEqual(list(res), [])
class GoogleImageTest(_common.TestCase):
@ -222,14 +225,14 @@ class GoogleImageTest(_common.TestCase):
[{"unescapedUrl": "url_to_the_image"}]}}"""
self.mock_response(self._google_url, json)
result_url = fetchart.google_art(album)
self.assertEqual(result_url, 'url_to_the_image')
self.assertEqual(list(result_url)[0], 'url_to_the_image')
def test_google_art_dont_finds_image(self):
album = _common.Bag(albumartist="some artist", album="some album")
json = """bla blup"""
self.mock_response(self._google_url, json)
result_url = fetchart.google_art(album)
self.assertEqual(result_url, None)
self.assertEqual(list(result_url), [])
class ArtImporterTest(_common.TestCase):

View file

@ -12,6 +12,8 @@
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
from beetsplug import fetchart
import os.path
from _common import unittest
from helper import TestHelper
@ -41,6 +43,17 @@ class FetchartCliTest(unittest.TestCase, TestHelper):
with open(cover_path, 'r') as f:
self.assertEqual(f.read(), 'IMAGE')
def test_sanitize_sources(self):
self.assertEqual(fetchart.sanitize_sources(['google', 'unknown']),
['google'])
self.assertEqual(fetchart.sanitize_sources(['google', 'google']),
['google'])
res = fetchart.sanitize_sources(['google', '*', 'amazon'])
# don't check strict egality on lengths as itunes source may be removed
# by plugin
self.assertTrue(len(res) >= len(fetchart.SOURCES_ALL) - 1 and
res[0] == 'google' and res[-1] == 'amazon')
def suite():
return unittest.TestLoader().loadTestsFromName(__name__)