mirror of
https://github.com/beetbox/beets.git
synced 2025-12-24 09:33:46 +01:00
albumart.org scraper art source (#272)
This commit is contained in:
parent
00e7523374
commit
700c7cd9f8
3 changed files with 110 additions and 12 deletions
|
|
@ -18,6 +18,7 @@ import urllib
|
|||
import sys
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
from beets.autotag.mb import album_for_id
|
||||
|
||||
|
|
@ -28,29 +29,64 @@ COVER_NAMES = ['cover', 'front', 'art', 'album', 'folder']
|
|||
log = logging.getLogger('beets')
|
||||
|
||||
|
||||
CONTENT_TYPES = ('image/jpeg',)
|
||||
def _fetch_image(url):
|
||||
"""Downloads an image from a URL and checks whether it seems to
|
||||
actually be an image. If so, returns a path to the downloaded image.
|
||||
Otherwise, returns None.
|
||||
"""
|
||||
log.debug('Downloading art: %s' % url)
|
||||
try:
|
||||
fn, headers = urllib.urlretrieve(url)
|
||||
except IOError:
|
||||
log.debug('error fetching art')
|
||||
return
|
||||
|
||||
# Make sure it's actually an image.
|
||||
if headers.gettype() in CONTENT_TYPES:
|
||||
log.debug('Downloaded art to: %s' % fn)
|
||||
return fn
|
||||
else:
|
||||
log.debug('Not an image.')
|
||||
|
||||
|
||||
# Art from Amazon.
|
||||
|
||||
AMAZON_URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg'
|
||||
AMAZON_INDICES = (1,2)
|
||||
AMAZON_CONTENT_TYPE = 'image/jpeg'
|
||||
def art_for_asin(asin):
|
||||
"""Fetches art for an Amazon ID (ASIN) string."""
|
||||
for index in AMAZON_INDICES:
|
||||
# Fetch the image.
|
||||
url = AMAZON_URL % (asin, index)
|
||||
try:
|
||||
log.debug('Downloading art: %s' % url)
|
||||
fn, headers = urllib.urlretrieve(url)
|
||||
except IOError:
|
||||
log.debug('error fetching art at URL %s' % url)
|
||||
continue
|
||||
|
||||
# Make sure it's actually an image.
|
||||
if headers.gettype() == AMAZON_CONTENT_TYPE:
|
||||
log.debug('Downloaded art to: %s' % fn)
|
||||
fn = _fetch_image(url)
|
||||
if fn:
|
||||
return fn
|
||||
|
||||
|
||||
# AlbumArt.org scraper.
|
||||
|
||||
AAO_URL = 'http://www.albumart.org/index_detail.php'
|
||||
AAO_PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"'
|
||||
def aao_art(asin):
|
||||
# Get the page from albumart.org.
|
||||
url = '%s?%s' % (AAO_URL, urllib.urlencode({'asin': asin}))
|
||||
try:
|
||||
log.debug('Scraping art URL: %s' % url)
|
||||
page = urllib.urlopen(url).read()
|
||||
except IOError:
|
||||
log.debug('Error scraping art page')
|
||||
return
|
||||
|
||||
# Search the page for the image URL.
|
||||
m = re.search(AAO_PAT, page)
|
||||
if m:
|
||||
image_url = m.group(1)
|
||||
return _fetch_image(image_url)
|
||||
else:
|
||||
log.debug('No image found on page')
|
||||
|
||||
|
||||
# Art from the filesystem.
|
||||
|
||||
def art_in_path(path):
|
||||
|
|
@ -91,7 +127,10 @@ def art_for_album(album, path):
|
|||
|
||||
if album.asin:
|
||||
log.debug('Fetching album art for ASIN %s.' % album.asin)
|
||||
return art_for_asin(album.asin)
|
||||
out = art_for_asin(album.asin)
|
||||
if out:
|
||||
return out
|
||||
return aao_art(album.asin)
|
||||
else:
|
||||
log.debug('No ASIN available: no art found.')
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ Changelog
|
|||
via the ``ignore`` setting; see :doc:`/reference/config`.
|
||||
* The database now keeps track of files' modification times so that, during
|
||||
an ``update``, unmodified files can be skipped. (Thanks to Jos van der Til.)
|
||||
* The album art fetcher now uses `albumart.org`_ as a fallback when the Amazon
|
||||
art downloader fails.
|
||||
* A new ``timeout`` config value avoids database locking errors on slow systems.
|
||||
* Fix a crash after using the "as Tracks" option during import.
|
||||
* Fix a Unicode error when tagging items with missing titles.
|
||||
|
|
@ -53,6 +55,7 @@ Changelog
|
|||
.. _acoustid: http://acoustid.org/
|
||||
.. _Peter Brunner: https://github.com/Lugoues
|
||||
.. _Simon Chopin: https://github.com/laarmen
|
||||
.. _albumart.org: http://www.albumart.org/
|
||||
|
||||
1.0b10 (September 22, 2011)
|
||||
---------------------------
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from beets.autotag import art
|
|||
from beets.autotag import AlbumInfo
|
||||
import os
|
||||
import shutil
|
||||
import StringIO
|
||||
|
||||
class MockHeaders(object):
|
||||
def __init__(self, typeval):
|
||||
|
|
@ -31,7 +32,9 @@ class MockUrlRetrieve(object):
|
|||
def __init__(self, pathval, typeval):
|
||||
self.pathval = pathval
|
||||
self.headers = MockHeaders(typeval)
|
||||
self.fetched = None
|
||||
def __call__(self, url):
|
||||
self.fetched = url
|
||||
return self.pathval, self.headers
|
||||
|
||||
class AmazonArtTest(unittest.TestCase):
|
||||
|
|
@ -72,8 +75,16 @@ class CombinedTest(unittest.TestCase):
|
|||
def setUp(self):
|
||||
self.dpath = os.path.join(_common.RSRC, 'arttest')
|
||||
os.mkdir(self.dpath)
|
||||
self.old_urlopen = art.urllib.urlopen
|
||||
art.urllib.urlopen = self._urlopen
|
||||
self.page_text = ""
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.dpath)
|
||||
art.urllib.urlopen = self.old_urlopen
|
||||
|
||||
def _urlopen(self, url):
|
||||
self.urlopen_called = True
|
||||
return StringIO.StringIO(self.page_text)
|
||||
|
||||
def test_main_interface_returns_amazon_art(self):
|
||||
art.urllib.urlretrieve = MockUrlRetrieve('anotherpath', 'image/jpeg')
|
||||
|
|
@ -99,6 +110,51 @@ class CombinedTest(unittest.TestCase):
|
|||
artpath = art.art_for_album(album, self.dpath)
|
||||
self.assertEqual(artpath, 'anotherpath')
|
||||
|
||||
def test_main_interface_tries_amazon_before_aao(self):
|
||||
self.urlopen_called = False
|
||||
art.urllib.urlretrieve = MockUrlRetrieve('anotherpath', 'image/jpeg')
|
||||
album = AlbumInfo(None, None, None, None, None, asin='xxxx')
|
||||
art.art_for_album(album, self.dpath)
|
||||
self.assertFalse(self.urlopen_called)
|
||||
|
||||
def test_main_interface_falls_back_to_aao(self):
|
||||
self.urlopen_called = False
|
||||
art.urllib.urlretrieve = MockUrlRetrieve('anotherpath', 'text/html')
|
||||
album = AlbumInfo(None, None, None, None, None, asin='xxxx')
|
||||
art.art_for_album(album, self.dpath)
|
||||
self.assertTrue(self.urlopen_called)
|
||||
|
||||
class AAOTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.old_urlopen = art.urllib.urlopen
|
||||
self.old_urlretrieve = art.urllib.urlretrieve
|
||||
art.urllib.urlopen = self._urlopen
|
||||
self.retriever = MockUrlRetrieve('somepath', 'image/jpeg')
|
||||
art.urllib.urlretrieve = self.retriever
|
||||
self.page_text = ''
|
||||
def tearDown(self):
|
||||
art.urllib.urlopen = self.old_urlopen
|
||||
art.urllib.urlretrieve = self.old_urlretrieve
|
||||
|
||||
def _urlopen(self, url):
|
||||
return StringIO.StringIO(self.page_text)
|
||||
|
||||
def test_aao_scraper_finds_image(self):
|
||||
self.page_text = """
|
||||
<br />
|
||||
<a href="TARGET_URL" title="View larger image" class="thickbox" style="color: #7E9DA2; text-decoration:none;">
|
||||
<img src="http://www.albumart.org/images/zoom-icon.jpg" alt="View larger image" width="17" height="15" border="0"/></a>
|
||||
"""
|
||||
res = art.aao_art('x')
|
||||
self.assertEqual(self.retriever.fetched, 'TARGET_URL')
|
||||
self.assertEqual(res, 'somepath')
|
||||
|
||||
def test_aao_scraper_returns_none_when_no_image_present(self):
|
||||
self.page_text = "blah blah"
|
||||
res = art.aao_art('x')
|
||||
self.assertEqual(self.retriever.fetched, None)
|
||||
self.assertEqual(res, None)
|
||||
|
||||
def suite():
|
||||
return unittest.TestLoader().loadTestsFromName(__name__)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue