Merge branch 'master' into fetchart_unify_sources,

i.e. adapt the fanart.tv source
This commit is contained in:
wordofglass 2016-04-16 13:34:46 +02:00
commit 9ce10c2fda
11 changed files with 348 additions and 46 deletions

View file

@ -40,12 +40,18 @@ class ParseError(Exception):
"""
def edit(filename):
def edit(filename, log):
"""Open `filename` in a text editor.
"""
cmd = util.shlex_split(util.editor_command())
cmd.append(filename)
subprocess.call(cmd)
log.debug(u'invoking editor command: {!r}', cmd)
try:
subprocess.call(cmd)
except OSError as exc:
raise ui.UserError(u'could not run editor command {!r}: {}'.format(
cmd[0], exc
))
def dump(arg):
@ -245,7 +251,7 @@ class EditPlugin(plugins.BeetsPlugin):
try:
while True:
# Ask the user to edit the data.
edit(new.name)
edit(new.name, self._log)
# Read the data back after editing and check whether anything
# changed.

View file

@ -53,14 +53,15 @@ class Candidate(object):
MATCH_EXACT = 0
MATCH_FALLBACK = 1
def __init__(self, log, path=None, url=None, source=u'', match=None):
def __init__(self, log, path=None, url=None, source=u'',
match=None, size=None):
self._log = log
self.path = path
self.url = url
self.source = source
self.check = None
self.match = match
self.size = None
self.size = size
def _validate(self, extra):
"""Determine whether the candidate artwork is valid based on
@ -79,7 +80,8 @@ class Candidate(object):
return self.CANDIDATE_EXACT
# get_size returns None if no local imaging backend is available
self.size = ArtResizer.shared.get_size(self.path)
if not self.size:
self.size = ArtResizer.shared.get_size(self.path)
self._log.debug(u'image size: {}', self.size)
if not self.size:
@ -296,7 +298,7 @@ class GoogleImages(RemoteArtSource):
URL = u'https://www.googleapis.com/customsearch/v1'
def __init__(self, *args, **kwargs):
super(RemoteArtSource, self).__init__(*args, **kwargs)
super(GoogleImages, self).__init__(*args, **kwargs)
self.key = self._config['google_key'].get(),
self.cx = self._config['google_engine'].get(),
@ -333,6 +335,66 @@ class GoogleImages(RemoteArtSource):
match=Candidate.MATCH_EXACT)
class FanartTV(ArtSource):
"""Art from fanart.tv requested using their API"""
NAME = u"fanart.tv"
API_URL = 'http://webservice.fanart.tv/v3/'
API_ALBUMS = API_URL + 'music/albums/'
PROJECT_KEY = '61a7d0ab4e67162b7a0c7c35915cd48e'
def __init__(self, *args, **kwargs):
super(FanartTV, self).__init__(*args, **kwargs)
self.client_key = self._config['fanarttv_key'].get()
def get(self, album, extra):
if not album.mb_releasegroupid:
return
response = self.request(
self.API_ALBUMS + album.mb_releasegroupid,
headers={'api-key': self.PROJECT_KEY,
'client-key': self.client_key})
try:
data = response.json()
except ValueError:
self._log.debug(u'fanart.tv: error loading response: {}',
response.text)
return
if u'status' in data and data[u'status'] == u'error':
if u'not found' in data[u'error message'].lower():
self._log.debug(u'fanart.tv: no image found')
elif u'api key' in data[u'error message'].lower():
self._log.warning(u'fanart.tv: Invalid API key given, please '
u'enter a valid one in your config file.')
else:
self._log.debug(u'fanart.tv: error on request: {}',
data[u'error message'])
return
matches = []
# can there be more than one releasegroupid per responce?
for mb_releasegroupid in data.get(u'albums', dict()):
if album.mb_releasegroupid == mb_releasegroupid:
# note: there might be more art referenced, e.g. cdart
matches.extend(
data[u'albums'][mb_releasegroupid][u'albumcover'])
# can this actually occur?
else:
self._log.debug(u'fanart.tv: unexpected mb_releasegroupid in '
u'response!')
matches.sort(key=lambda x: x[u'likes'], reverse=True)
for item in matches:
# fanart.tv has a strict size requirement for album art to be
# uploaded
yield self._candidate(url=item[u'url'],
match=Candidate.MATCH_EXACT,
size=(1000, 1000))
class ITunesStore(RemoteArtSource):
NAME = u"iTunes Store"
@ -554,7 +616,7 @@ class FileSystem(LocalArtSource):
SOURCES_ALL = [u'filesystem',
u'coverart', u'itunes', u'amazon', u'albumart',
u'wikipedia', u'google']
u'wikipedia', u'google', u'fanarttv']
ART_SOURCES = {
u'filesystem': FileSystem,
@ -564,6 +626,7 @@ ART_SOURCES = {
u'amazon': Amazon,
u'wikipedia': Wikipedia,
u'google': GoogleImages,
u'fanarttv': FanartTV,
}
SOURCE_NAMES = {v: k for k, v in ART_SOURCES.items()}
@ -585,8 +648,10 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
'coverart', 'itunes', 'amazon', 'albumart'],
'google_key': None,
'google_engine': u'001442825323518660753:hrh5ch1gjzm',
'fanarttv_key': None
})
self.config['google_key'].redact = True
self.config['fanarttv_key'].redact = True
# Holds paths to downloaded images between fetching them and
# placing them in the filesystem.
@ -614,6 +679,13 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
if not self.config['google_key'].get() and \
u'google' in available_sources:
available_sources.remove(u'google')
if not self.config['fanarttv_key'].get() and \
u'fanarttv' in available_sources:
self._log.warn(
u'fanart.tv source enabled, but no personal API given. This '
u'works as of now, however, fanart.tv prefers users to '
u'register a personal key. Additionaly this makes new art '
u'available shorter after its upload. See the documentation.')
sources_name = plugins.sanitize_choices(
self.config['sources'].as_str_seq(), available_sources)
if 'remote_priority' in self.config:

View file

@ -19,6 +19,7 @@ class ImportAddedPlugin(BeetsPlugin):
super(ImportAddedPlugin, self).__init__()
self.config.add({
'preserve_mtimes': False,
'preserve_write_mtimes': False,
})
# item.id for new items that were reimported
@ -37,6 +38,7 @@ class ImportAddedPlugin(BeetsPlugin):
register('item_linked', self.record_import_mtime)
register('album_imported', self.update_album_times)
register('item_imported', self.update_item_times)
register('after_write', self.update_after_write_time)
def check_config(self, task, session):
self.config['preserve_mtimes'].get(bool)
@ -120,3 +122,13 @@ class ImportAddedPlugin(BeetsPlugin):
self._log.debug(u"Import of item '{0}', selected item.added={1}",
util.displayable_path(item.path), item.added)
item.store()
def update_after_write_time(self, item):
"""Update the mtime of the item's file with the item.added value
after each write of the item if `preserve_write_mtimes` is enabled.
"""
if item.added:
if self.config['preserve_write_mtimes'].get(bool):
self.write_item_mtime(item, item.added)
self._log.debug(u"Write of item '{0}', selected item.added={1}",
util.displayable_path(item.path), item.added)

View file

@ -16,15 +16,15 @@
"""Fetches, embeds, and displays lyrics.
"""
from __future__ import division, absolute_import, print_function
from __future__ import absolute_import, division, print_function
import re
import requests
import json
import unicodedata
import urllib
import difflib
import itertools
import json
import re
import requests
import unicodedata
import urllib
import warnings
from HTMLParser import HTMLParseError
@ -56,7 +56,7 @@ URL_CHARACTERS = {
def unescape(text):
"""Resolves &#xxx; HTML entities (and some others)."""
"""Resolve &#xxx; HTML entities (and some others)."""
if isinstance(text, bytes):
text = text.decode('utf8', 'ignore')
out = text.replace(u' ', u' ')
@ -331,8 +331,12 @@ class LyricsWiki(SymbolsReplaced):
html = self.fetch_url(url)
if not html:
return
lyrics = extract_text_in(unescape(html), u"<div class='lyricbox'>")
lyrics = scrape_lyrics_from_html(lyrics)
# Get the HTML fragment inside the appropriate HTML element and then
# extract the text from it.
html_frag = extract_text_in(unescape(html), u"<div class='lyricbox'>")
lyrics = scrape_lyrics_from_html(html_frag)
if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
return lyrics
@ -430,8 +434,13 @@ def scrape_lyrics_from_html(html):
parse_only=SoupStrainer(text=is_text_notcode))
except HTMLParseError:
return None
soup = sorted(soup.stripped_strings, key=len)[-1]
return soup
# Get the longest text element (if any).
strings = sorted(soup.stripped_strings, key=len, reverse=True)
if strings:
return strings[0]
else:
return None
class Google(Backend):
@ -446,29 +455,29 @@ class Google(Backend):
"""
if not text:
return False
badTriggersOcc = []
nbLines = text.count('\n')
if nbLines <= 1:
bad_triggers_occ = []
nb_lines = text.count('\n')
if nb_lines <= 1:
self._log.debug(u"Ignoring too short lyrics '{0}'", text)
return False
elif nbLines < 5:
badTriggersOcc.append('too_short')
elif nb_lines < 5:
bad_triggers_occ.append('too_short')
else:
# Lyrics look legit, remove credits to avoid being penalized
# further down
text = remove_credits(text)
badTriggers = ['lyrics', 'copyright', 'property', 'links']
bad_triggers = ['lyrics', 'copyright', 'property', 'links']
if artist:
badTriggersOcc += [artist]
bad_triggers_occ += [artist]
for item in badTriggers:
badTriggersOcc += [item] * len(re.findall(r'\W%s\W' % item,
text, re.I))
for item in bad_triggers:
bad_triggers_occ += [item] * len(re.findall(r'\W%s\W' % item,
text, re.I))
if badTriggersOcc:
self._log.debug(u'Bad triggers detected: {0}', badTriggersOcc)
return len(badTriggersOcc) < 2
if bad_triggers_occ:
self._log.debug(u'Bad triggers detected: {0}', bad_triggers_occ)
return len(bad_triggers_occ) < 2
def slugify(self, text):
"""Normalize a string and remove non-alphanumeric characters.
@ -561,6 +570,9 @@ class LyricsPlugin(plugins.BeetsPlugin):
self.import_stages = [self.imported]
self.config.add({
'auto': True,
'bing_client_secret': None,
'bing_lang_from': [],
'bing_lang_to': None,
'google_API_key': None,
'google_engine_ID': u'009217259823014548361:lndtuqkycfu',
'genius_api_key':
@ -570,6 +582,7 @@ class LyricsPlugin(plugins.BeetsPlugin):
'force': False,
'sources': self.SOURCES,
})
self.config['bing_client_secret'].redact = True
self.config['google_API_key'].redact = True
self.config['google_engine_ID'].redact = True
self.config['genius_api_key'].redact = True
@ -583,6 +596,27 @@ class LyricsPlugin(plugins.BeetsPlugin):
self.backends = [self.SOURCE_BACKENDS[key](self.config, self._log)
for key in self.config['sources'].as_str_seq()]
self.config['bing_lang_from'] = [
x.lower() for x in self.config['bing_lang_from'].as_str_seq()]
self.bing_auth_token = None
def get_bing_access_token(self):
params = {
'client_id': 'beets',
'client_secret': self.config['bing_client_secret'],
'scope': 'http://api.microsofttranslator.com',
'grant_type': 'client_credentials',
}
oauth_url = 'https://datamarket.accesscontrol.windows.net/v2/OAuth2-13'
oauth_token = json.loads(requests.post(
oauth_url,
data=urllib.urlencode(params)).content)
if 'access_token' in oauth_token:
return "Bearer " + oauth_token['access_token']
else:
self._log.warning(u'Could not get Bing Translate API access token.'
u' Check your "bing_client_secret" password')
def commands(self):
cmd = ui.Subcommand('lyrics', help='fetch song lyrics')
@ -638,6 +672,16 @@ class LyricsPlugin(plugins.BeetsPlugin):
if lyrics:
self._log.info(u'fetched lyrics: {0}', item)
if self.config['bing_client_secret'].get():
from langdetect import detect
lang_from = detect(lyrics)
if self.config['bing_lang_to'].get() != lang_from and (
not self.config['bing_lang_from'] or (
lang_from in self.config[
'bing_lang_from'].as_str_seq())):
lyrics = self.append_translation(
lyrics, self.config['bing_lang_to'])
else:
self._log.info(u'lyrics not found: {0}', item)
fallback = self.config['fallback'].get()
@ -645,11 +689,10 @@ class LyricsPlugin(plugins.BeetsPlugin):
lyrics = fallback
else:
return
item.lyrics = lyrics
if write:
item.try_write()
print(lyrics)
item.store()
def get_lyrics(self, artist, title):
@ -662,3 +705,30 @@ class LyricsPlugin(plugins.BeetsPlugin):
self._log.debug(u'got lyrics from backend: {0}',
backend.__class__.__name__)
return _scrape_strip_cruft(lyrics, True)
def append_translation(self, text, to_lang):
import xml.etree.ElementTree as ET
if not self.bing_auth_token:
self.bing_auth_token = self.get_bing_access_token()
if self.bing_auth_token:
# Extract unique lines to limit API request size per song
text_lines = set(text.split('\n'))
url = ('http://api.microsofttranslator.com/v2/Http.svc/'
'Translate?text=%s&to=%s' % ('|'.join(text_lines), to_lang))
r = requests.get(url,
headers={"Authorization ": self.bing_auth_token})
if r.status_code != 200:
self._log.debug('translation API error {}: {}', r.status_code,
r.text)
if 'token has expired' in r.text:
self.bing_auth_token = None
return self.append_translation(text, to_lang)
return text
lines_translated = ET.fromstring(r.text.encode('utf8')).text
# Use a translation mapping dict to build resulting lyrics
translations = dict(zip(text_lines, lines_translated.split('|')))
result = ''
for line in text.split('\n'):
result += '%s / %s\n' % (line, translations[line])
return result

View file

@ -8,6 +8,15 @@ New features:
* :doc:`/plugins/convert`: A new `album_art_maxwidth` lets you resize album
art while copying it.
* :doc:`/plugins/importadded`: A new `preserve_write_mtimes` option
lets you preserve mtime of files after each write.
* :doc:`/plugins/lyrics`: The plugin can now translate the fetched lyrics to a
configured `bing_lang_to` langage. Enabling translation require to register
for a Microsoft Azure Marketplace free account. Thanks to :user:`Kraymer`.
* :doc:`/plugins/fetchart`: Album art can now be fetched from `fanart.tv`_.
Albums are matched using the ``mb_releasegroupid`` tag.
.. _fanart.tv: https://fanart.tv/
Fixes:
@ -22,6 +31,8 @@ Fixes:
LyricsWiki page markup. :bug:`1912` :bug:`1909`
* :doc:`/plugins/lyrics`: Also fix retrieval from Musixmatch and the way we
guess the URL for lyrics. :bug:`1880`
* :doc:`/plugins/edit`: Fail gracefully when the configured text editor
command can't be invoked. :bug:`1927`
1.3.17 (February 7, 2016)

View file

@ -46,16 +46,18 @@ file. The available options are:
- **sources**: List of sources to search for images. An asterisk `*` expands
to all available sources.
Default: ``filesystem coverart itunes amazon albumart``, i.e., everything but
``wikipedia`` and ``google``. Enable those two sources for more matches at
the cost of some speed. They are searched in the given order, thus in the
default config, no remote (Web) art source are queried if local art is
found in the filesystem. To use a local image as fallback, move it to the end
of the list.
``wikipedia``, ``google`` and ``fanarttv``. Enable those sources for more
matches at the cost of some speed. They are searched in the given order,
thus in the default config, no remote (Web) art source are queried if
local art is found in the filesystem. To use a local image as fallback,
move it to the end of the list.
- **google_key**: Your Google API key (to enable the Google Custom Search
backend).
Default: None.
- **google_engine**: The custom search engine to use.
Default: The `beets custom search engine`_, which searches the entire web.
**fanarttv_key**: The personal API key for requesting art from
fanart.tv. See below.
Note: ``minwidth`` and ``enforce_ratio`` options require either `ImageMagick`_
or `Pillow`_.
@ -161,6 +163,21 @@ default engine searches the entire web for cover art.
Note that the Google custom search API is limited to 100 queries per day.
After that, the fetchart plugin will fall back on other declared data sources.
Fanart.tv
'''''''''
Although not strictly necessary right now, you might think about
`registering a personal fanart.tv API key`_. Set the ``fanarttv_key``
configuration option to your key, then add ``fanarttv`` to the list of sources
in your configuration.
.. _registering a personal fanart.tv API key: https://fanart.tv/get-an-api-key/
More detailed information can be found `on their blog`_. Specifically, the
personal key will give you earlier access to new art.
.. _on their blog: https://fanart.tv/2015/01/personal-api-keys/
Embedding Album Art
-------------------

View file

@ -22,8 +22,11 @@ The ``item.added`` field is populated as follows:
set to the oldest mtime of the files in the album before they were imported.
The mtime of album directories is ignored.
This plugin can optionally be configured to also preserve mtimes using the
``preserve_mtimes`` option.
This plugin can optionally be configured to also preserve mtimes at
import using the ``preserve_mtimes`` option.
When ``preserve_write_mtimes`` option is set, this plugin preserves
mtimes after each write to files using the ``item.added`` attribute.
File modification times are preserved as follows:
@ -40,9 +43,13 @@ Configuration
-------------
To configure the plugin, make an ``importadded:`` section in your
configuration file. There is one option available:
configuration file. There are two options available:
- **preserve_mtimes**: After writing files, re-set their mtimes to their
- **preserve_mtimes**: After importing files, re-set their mtimes to their
original value.
Default: ``no``.
- **preserve_write_mtimes**: After writing files, re-set their mtimes to their
original value.
Default: ``no``.

View file

@ -38,6 +38,14 @@ configuration file. The available options are:
- **auto**: Fetch lyrics automatically during import.
Default: ``yes``.
- **bing_client_secret**: Your Bing Translation application password
(to :ref:`lyrics-translation`)
- **bing_lang_from**: By default all lyrics with a language other than
``bing_lang_to`` are translated. Use a list of lang codes to restrict the set
of source languages to translate.
Default: ``[]``
- **bing_lang_to**: Language to translate lyrics into.
Default: None.
- **fallback**: By default, the file will be left unchanged when no lyrics are
found. Use the empty string ``''`` to reset the lyrics in such a case.
Default: None.
@ -113,3 +121,23 @@ After that, the lyrics plugin will fall back on other declared data sources.
.. _pip: http://www.pip-installer.org/
.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
.. _lyrics-translation:
Activate On-the-Fly Translation
-------------------------------
Using the Bing Translation API requires `langdetect`_, which you can install
using `pip`_ by typing::
pip install langdetect
You also need to register for a Microsoft Azure Marketplace free account and
to the `Microsoft Translator API`_. Follow the four steps process, specifically
at step 3 enter `beets`` as *Client ID* and copy/paste the generated
*Client secret*. into your ``bing_client_secret`` configuration, alongside
``bing_lang_to`` target `language code`_.
.. _langdetect: https://pypi.python.org/pypi/langdetect
.. _Microsoft Translator API: https://www.microsoft.com/en-us/translator/getstarted.aspx
.. _language code: https://msdn.microsoft.com/en-us/library/hh456380.aspx

View file

@ -278,6 +278,82 @@ class GoogleImageTest(UseThePlugin):
next(self.source.get(album, self.extra))
class FanartTVTest(UseThePlugin):
RESPONSE_MULTIPLE = u"""{
"name": "artistname",
"mbid_id": "artistid",
"albums": {
"thereleasegroupid": {
"albumcover": [
{
"id": "24",
"url": "http://example.com/1.jpg",
"likes": "0"
},
{
"id": "42",
"url": "http://example.com/2.jpg",
"likes": "0"
},
{
"id": "23",
"url": "http://example.com/3.jpg",
"likes": "0"
}
],
"cdart": [
{
"id": "123",
"url": "http://example.com/4.jpg",
"likes": "0",
"disc": "1",
"size": "1000"
}
]
}
}
}"""
RESPONSE_ERROR = u"""{
"status": "error",
"error message": "the error message"
}"""
RESPONSE_MALFORMED = u"bla blup"
def setUp(self):
super(FanartTVTest, self).setUp()
self.source = fetchart.FanartTV(logger, self.plugin.config)
self.extra = dict()
@responses.activate
def run(self, *args, **kwargs):
super(FanartTVTest, self).run(*args, **kwargs)
def mock_response(self, url, json):
responses.add(responses.GET, url, body=json,
content_type='application/json')
def test_fanarttv_finds_image(self):
album = _common.Bag(mb_releasegroupid=u'thereleasegroupid')
self.mock_response(fetchart.FanartTV.API_ALBUMS + u'thereleasegroupid',
self.RESPONSE_MULTIPLE)
candidate = next(self.source.get(album, self.extra))
self.assertEqual(candidate.url, 'http://example.com/1.jpg')
def test_fanarttv_returns_no_result_when_error_received(self):
album = _common.Bag(mb_releasegroupid=u'thereleasegroupid')
self.mock_response(fetchart.FanartTV.API_ALBUMS + u'thereleasegroupid',
self.RESPONSE_ERROR)
with self.assertRaises(StopIteration):
next(self.source.get(album, self.extra))
def test_fanarttv_returns_no_result_with_malformed_response(self):
album = _common.Bag(mb_releasegroupid=u'thereleasegroupid')
self.mock_response(fetchart.FanartTV.API_ALBUMS + u'thereleasegroupid',
self.RESPONSE_MALFORMED)
with self.assertRaises(StopIteration):
next(self.source.get(album, self.extra))
@_common.slow_test()
class ArtImporterTest(UseThePlugin):
def setUp(self):

View file

@ -47,7 +47,9 @@ class ModifyFileMocker(object):
if replacements:
self.action = self.replace_contents
def overwrite_contents(self, filename):
# The two methods below mock the `edit` utility function in the plugin.
def overwrite_contents(self, filename, log):
"""Modify `filename`, replacing its contents with `self.contents`. If
`self.contents` is empty, the file remains unchanged.
"""
@ -55,7 +57,7 @@ class ModifyFileMocker(object):
with codecs.open(filename, 'w', encoding='utf8') as f:
f.write(self.contents)
def replace_contents(self, filename):
def replace_contents(self, filename, log):
"""Modify `filename`, reading its contents and replacing the strings
specified in `self.replacements`.
"""

View file

@ -12,6 +12,7 @@ deps =
flask
mock
nose
nose-show-skipped
pyechonest
pylast
rarfile
@ -29,7 +30,7 @@ deps =
{[testenv]deps}
coverage
commands =
nosetests --with-coverage {posargs}
nosetests --show-skipped --with-coverage {posargs}
[testenv:py27setup]
basepython = python2.7