This commit is contained in:
Fabrice Laporte 2017-05-02 07:30:40 +02:00
parent fa9262d61b
commit 3e3ad6974c
2 changed files with 16 additions and 8 deletions

View file

@ -84,6 +84,7 @@ def unichar(i):
except ValueError:
return struct.pack('i', i).decode('utf-32')
def unescape(text):
"""Resolve &#xxx; HTML entities (and some others)."""
if isinstance(text, bytes):
@ -110,7 +111,6 @@ def extract_text_in(html, starttag):
"""Extract the text from a <DIV> tag in the HTML starting with
``starttag``. Returns None if parsing fails.
"""
# Strip off the leading text before opening tag.
try:
_, html = html.split(starttag, 1)
@ -151,10 +151,10 @@ def search_pairs(item):
and featured artists from the strings and add them as candidates.
The method also tries to split multiple titles separated with `/`.
"""
def generate_alternatives(string, patterns):
"""Generate string alternatives by extracting first matching group for
each given pattern."""
each given pattern.
"""
alternatives = [string]
for pattern in patterns:
match = re.search(pattern, string, re.IGNORECASE)
@ -270,6 +270,7 @@ class MusiXmatch(SymbolsReplaced):
class Genius(Backend):
"""Fetch lyrics from Genius via genius-api."""
def __init__(self, config, log):
super(Genius, self).__init__(config, log)
self.api_key = config['genius_api_key'].as_str()
@ -361,6 +362,7 @@ class Genius(Backend):
class LyricsWiki(SymbolsReplaced):
"""Fetch lyrics from LyricsWiki."""
URL_PATTERN = 'http://lyrics.wikia.com/%s:%s'
def fetch(self, artist, title):
@ -381,6 +383,7 @@ class LyricsWiki(SymbolsReplaced):
class LyricsCom(Backend):
"""Fetch lyrics from Lyrics.com."""
URL_PATTERN = 'http://www.lyrics.com/%s-lyrics-%s.html'
NOT_FOUND = (
'Sorry, we do not have the lyric',
@ -484,6 +487,7 @@ def scrape_lyrics_from_html(html):
class Google(Backend):
"""Fetch lyrics from Google search results."""
def __init__(self, config, log):
super(Google, self).__init__(config, log)
self.api_key = config['google_API_key'].as_str()
@ -719,7 +723,8 @@ class LyricsPlugin(plugins.BeetsPlugin):
def fetch_item_lyrics(self, lib, item, write, force):
"""Fetch and store lyrics for a single item. If ``write``, then the
lyrics will also be written to the file itself."""
lyrics will also be written to the file itself.
"""
# Skip if the item already has lyrics.
if not force and item.lyrics:
self._log.info(u'lyrics already present: {0}', item)

View file

@ -311,7 +311,8 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest):
'lyrics sources testing not enabled')
def test_google_sources_ok(self):
"""Test if lyrics present on websites registered in beets google custom
search engine are correctly scraped."""
search engine are correctly scraped.
"""
for s in self.GOOGLE_SOURCES:
url = s['url'] + s['path']
res = lyrics.scrape_lyrics_from_html(
@ -323,6 +324,7 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest):
class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest):
"""Test scraping heuristics on a fake html page.
"""
source = dict(url=u'http://www.example.com', artist=u'John Doe',
title=u'Beets song', path=u'/lyrics/beetssong')
@ -330,7 +332,6 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest):
"""Set up configuration"""
LyricsGoogleBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
@patch.object(lyrics.Backend, 'fetch_url', MockFetchUrl())
def test_mocked_source_ok(self):
@ -344,7 +345,8 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest):
@patch.object(lyrics.Backend, 'fetch_url', MockFetchUrl())
def test_is_page_candidate_exact_match(self):
"""Test matching html page title with song infos -- when song infos are
present in the title."""
present in the title.
"""
from bs4 import SoupStrainer, BeautifulSoup
s = self.source
url = six.text_type(s['url'] + s['path'])
@ -356,7 +358,8 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest):
def test_is_page_candidate_fuzzy_match(self):
"""Test matching html page title with song infos -- when song infos are
not present in the title."""
not present in the title.
"""
s = self.source
url = s['url'] + s['path']
url_title = u'example.com | Beats song by John doe'