mirror of
https://github.com/beetbox/beets.git
synced 2026-01-03 22:42:44 +01:00
Merge pull request #1939 from Kraymer/lyrics_translate
Add lyrics translation using Microsoft Bing API
This commit is contained in:
commit
a282245011
3 changed files with 114 additions and 22 deletions
|
|
@ -16,15 +16,15 @@
|
|||
"""Fetches, embeds, and displays lyrics.
|
||||
"""
|
||||
|
||||
from __future__ import division, absolute_import, print_function
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import re
|
||||
import requests
|
||||
import json
|
||||
import unicodedata
|
||||
import urllib
|
||||
import difflib
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import requests
|
||||
import unicodedata
|
||||
import urllib
|
||||
import warnings
|
||||
from HTMLParser import HTMLParseError
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ URL_CHARACTERS = {
|
|||
|
||||
|
||||
def unescape(text):
|
||||
"""Resolves &#xxx; HTML entities (and some others)."""
|
||||
"""Resolve &#xxx; HTML entities (and some others)."""
|
||||
if isinstance(text, bytes):
|
||||
text = text.decode('utf8', 'ignore')
|
||||
out = text.replace(u' ', u' ')
|
||||
|
|
@ -455,29 +455,29 @@ class Google(Backend):
|
|||
"""
|
||||
if not text:
|
||||
return False
|
||||
badTriggersOcc = []
|
||||
nbLines = text.count('\n')
|
||||
if nbLines <= 1:
|
||||
bad_triggers_occ = []
|
||||
nb_lines = text.count('\n')
|
||||
if nb_lines <= 1:
|
||||
self._log.debug(u"Ignoring too short lyrics '{0}'", text)
|
||||
return False
|
||||
elif nbLines < 5:
|
||||
badTriggersOcc.append('too_short')
|
||||
elif nb_lines < 5:
|
||||
bad_triggers_occ.append('too_short')
|
||||
else:
|
||||
# Lyrics look legit, remove credits to avoid being penalized
|
||||
# further down
|
||||
text = remove_credits(text)
|
||||
|
||||
badTriggers = ['lyrics', 'copyright', 'property', 'links']
|
||||
bad_triggers = ['lyrics', 'copyright', 'property', 'links']
|
||||
if artist:
|
||||
badTriggersOcc += [artist]
|
||||
bad_triggers_occ += [artist]
|
||||
|
||||
for item in badTriggers:
|
||||
badTriggersOcc += [item] * len(re.findall(r'\W%s\W' % item,
|
||||
text, re.I))
|
||||
for item in bad_triggers:
|
||||
bad_triggers_occ += [item] * len(re.findall(r'\W%s\W' % item,
|
||||
text, re.I))
|
||||
|
||||
if badTriggersOcc:
|
||||
self._log.debug(u'Bad triggers detected: {0}', badTriggersOcc)
|
||||
return len(badTriggersOcc) < 2
|
||||
if bad_triggers_occ:
|
||||
self._log.debug(u'Bad triggers detected: {0}', bad_triggers_occ)
|
||||
return len(bad_triggers_occ) < 2
|
||||
|
||||
def slugify(self, text):
|
||||
"""Normalize a string and remove non-alphanumeric characters.
|
||||
|
|
@ -570,6 +570,9 @@ class LyricsPlugin(plugins.BeetsPlugin):
|
|||
self.import_stages = [self.imported]
|
||||
self.config.add({
|
||||
'auto': True,
|
||||
'bing_client_secret': None,
|
||||
'bing_lang_from': [],
|
||||
'bing_lang_to': None,
|
||||
'google_API_key': None,
|
||||
'google_engine_ID': u'009217259823014548361:lndtuqkycfu',
|
||||
'genius_api_key':
|
||||
|
|
@ -579,6 +582,7 @@ class LyricsPlugin(plugins.BeetsPlugin):
|
|||
'force': False,
|
||||
'sources': self.SOURCES,
|
||||
})
|
||||
self.config['bing_client_secret'].redact = True
|
||||
self.config['google_API_key'].redact = True
|
||||
self.config['google_engine_ID'].redact = True
|
||||
self.config['genius_api_key'].redact = True
|
||||
|
|
@ -592,6 +596,27 @@ class LyricsPlugin(plugins.BeetsPlugin):
|
|||
|
||||
self.backends = [self.SOURCE_BACKENDS[key](self.config, self._log)
|
||||
for key in self.config['sources'].as_str_seq()]
|
||||
self.config['bing_lang_from'] = [
|
||||
x.lower() for x in self.config['bing_lang_from'].as_str_seq()]
|
||||
self.bing_auth_token = None
|
||||
|
||||
def get_bing_access_token(self):
|
||||
params = {
|
||||
'client_id': 'beets',
|
||||
'client_secret': self.config['bing_client_secret'],
|
||||
'scope': 'http://api.microsofttranslator.com',
|
||||
'grant_type': 'client_credentials',
|
||||
}
|
||||
|
||||
oauth_url = 'https://datamarket.accesscontrol.windows.net/v2/OAuth2-13'
|
||||
oauth_token = json.loads(requests.post(
|
||||
oauth_url,
|
||||
data=urllib.urlencode(params)).content)
|
||||
if 'access_token' in oauth_token:
|
||||
return "Bearer " + oauth_token['access_token']
|
||||
else:
|
||||
self._log.warning(u'Could not get Bing Translate API access token.'
|
||||
u' Check your "bing_client_secret" password')
|
||||
|
||||
def commands(self):
|
||||
cmd = ui.Subcommand('lyrics', help='fetch song lyrics')
|
||||
|
|
@ -647,6 +672,16 @@ class LyricsPlugin(plugins.BeetsPlugin):
|
|||
|
||||
if lyrics:
|
||||
self._log.info(u'fetched lyrics: {0}', item)
|
||||
if self.config['bing_client_secret'].get():
|
||||
from langdetect import detect
|
||||
|
||||
lang_from = detect(lyrics)
|
||||
if self.config['bing_lang_to'].get() != lang_from and (
|
||||
not self.config['bing_lang_from'] or (
|
||||
lang_from in self.config[
|
||||
'bing_lang_from'].as_str_seq())):
|
||||
lyrics = self.append_translation(
|
||||
lyrics, self.config['bing_lang_to'])
|
||||
else:
|
||||
self._log.info(u'lyrics not found: {0}', item)
|
||||
fallback = self.config['fallback'].get()
|
||||
|
|
@ -654,11 +689,10 @@ class LyricsPlugin(plugins.BeetsPlugin):
|
|||
lyrics = fallback
|
||||
else:
|
||||
return
|
||||
|
||||
item.lyrics = lyrics
|
||||
|
||||
if write:
|
||||
item.try_write()
|
||||
print(lyrics)
|
||||
item.store()
|
||||
|
||||
def get_lyrics(self, artist, title):
|
||||
|
|
@ -671,3 +705,30 @@ class LyricsPlugin(plugins.BeetsPlugin):
|
|||
self._log.debug(u'got lyrics from backend: {0}',
|
||||
backend.__class__.__name__)
|
||||
return _scrape_strip_cruft(lyrics, True)
|
||||
|
||||
def append_translation(self, text, to_lang):
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
if not self.bing_auth_token:
|
||||
self.bing_auth_token = self.get_bing_access_token()
|
||||
if self.bing_auth_token:
|
||||
# Extract unique lines to limit API request size per song
|
||||
text_lines = set(text.split('\n'))
|
||||
url = ('http://api.microsofttranslator.com/v2/Http.svc/'
|
||||
'Translate?text=%s&to=%s' % ('|'.join(text_lines), to_lang))
|
||||
r = requests.get(url,
|
||||
headers={"Authorization ": self.bing_auth_token})
|
||||
if r.status_code != 200:
|
||||
self._log.debug('translation API error {}: {}', r.status_code,
|
||||
r.text)
|
||||
if 'token has expired' in r.text:
|
||||
self.bing_auth_token = None
|
||||
return self.append_translation(text, to_lang)
|
||||
return text
|
||||
lines_translated = ET.fromstring(r.text.encode('utf8')).text
|
||||
# Use a translation mapping dict to build resulting lyrics
|
||||
translations = dict(zip(text_lines, lines_translated.split('|')))
|
||||
result = ''
|
||||
for line in text.split('\n'):
|
||||
result += '%s / %s\n' % (line, translations[line])
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ New features:
|
|||
art while copying it.
|
||||
* :doc:`/plugins/importadded`: A new `preserve_write_mtimes` option
|
||||
lets you preserve mtime of files after each write.
|
||||
* :doc:`/plugins/lyrics`: The plugin can now translate the fetched lyrics to a
|
||||
configured `bing_lang_to` langage. Enabling translation require to register
|
||||
for a Microsoft Azure Marketplace free account. Thanks to :user:`Kraymer`.
|
||||
|
||||
Fixes:
|
||||
|
||||
|
|
|
|||
|
|
@ -38,6 +38,14 @@ configuration file. The available options are:
|
|||
|
||||
- **auto**: Fetch lyrics automatically during import.
|
||||
Default: ``yes``.
|
||||
- **bing_client_secret**: Your Bing Translation application password
|
||||
(to :ref:`lyrics-translation`)
|
||||
- **bing_lang_from**: By default all lyrics with a language other than
|
||||
``bing_lang_to`` are translated. Use a list of lang codes to restrict the set
|
||||
of source languages to translate.
|
||||
Default: ``[]``
|
||||
- **bing_lang_to**: Language to translate lyrics into.
|
||||
Default: None.
|
||||
- **fallback**: By default, the file will be left unchanged when no lyrics are
|
||||
found. Use the empty string ``''`` to reset the lyrics in such a case.
|
||||
Default: None.
|
||||
|
|
@ -113,3 +121,23 @@ After that, the lyrics plugin will fall back on other declared data sources.
|
|||
|
||||
.. _pip: http://www.pip-installer.org/
|
||||
.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
||||
.. _lyrics-translation:
|
||||
|
||||
Activate On-the-Fly Translation
|
||||
-------------------------------
|
||||
|
||||
Using the Bing Translation API requires `langdetect`_, which you can install
|
||||
using `pip`_ by typing::
|
||||
|
||||
pip install langdetect
|
||||
|
||||
You also need to register for a Microsoft Azure Marketplace free account and
|
||||
to the `Microsoft Translator API`_. Follow the four steps process, specifically
|
||||
at step 3 enter `beets`` as *Client ID* and copy/paste the generated
|
||||
*Client secret*. into your ``bing_client_secret`` configuration, alongside
|
||||
``bing_lang_to`` target `language code`_.
|
||||
|
||||
.. _langdetect: https://pypi.python.org/pypi/langdetect
|
||||
.. _Microsoft Translator API: https://www.microsoft.com/en-us/translator/getstarted.aspx
|
||||
.. _language code: https://msdn.microsoft.com/en-us/library/hh456380.aspx
|
||||
|
|
|
|||
Loading…
Reference in a new issue