diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index ad2d278b5..9a60df119 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -84,6 +84,7 @@ def unichar(i): except ValueError: return struct.pack('i', i).decode('utf-32') + def unescape(text): """Resolve &#xxx; HTML entities (and some others).""" if isinstance(text, bytes): @@ -110,7 +111,6 @@ def extract_text_in(html, starttag): """Extract the text from a
tag in the HTML starting with ``starttag``. Returns None if parsing fails. """ - # Strip off the leading text before opening tag. try: _, html = html.split(starttag, 1) @@ -151,10 +151,10 @@ def search_pairs(item): and featured artists from the strings and add them as candidates. The method also tries to split multiple titles separated with `/`. """ - def generate_alternatives(string, patterns): """Generate string alternatives by extracting first matching group for - each given pattern.""" + each given pattern. + """ alternatives = [string] for pattern in patterns: match = re.search(pattern, string, re.IGNORECASE) @@ -270,6 +270,7 @@ class MusiXmatch(SymbolsReplaced): class Genius(Backend): """Fetch lyrics from Genius via genius-api.""" + def __init__(self, config, log): super(Genius, self).__init__(config, log) self.api_key = config['genius_api_key'].as_str() @@ -361,6 +362,7 @@ class Genius(Backend): class LyricsWiki(SymbolsReplaced): """Fetch lyrics from LyricsWiki.""" + URL_PATTERN = 'http://lyrics.wikia.com/%s:%s' def fetch(self, artist, title): @@ -381,6 +383,7 @@ class LyricsWiki(SymbolsReplaced): class LyricsCom(Backend): """Fetch lyrics from Lyrics.com.""" + URL_PATTERN = 'http://www.lyrics.com/%s-lyrics-%s.html' NOT_FOUND = ( 'Sorry, we do not have the lyric', @@ -484,6 +487,7 @@ def scrape_lyrics_from_html(html): class Google(Backend): """Fetch lyrics from Google search results.""" + def __init__(self, config, log): super(Google, self).__init__(config, log) self.api_key = config['google_API_key'].as_str() @@ -719,7 +723,8 @@ class LyricsPlugin(plugins.BeetsPlugin): def fetch_item_lyrics(self, lib, item, write, force): """Fetch and store lyrics for a single item. If ``write``, then the - lyrics will also be written to the file itself.""" + lyrics will also be written to the file itself. + """ # Skip if the item already has lyrics. if not force and item.lyrics: self._log.info(u'lyrics already present: {0}', item) diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 9ed0eb4b4..0dbf658fe 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -311,7 +311,8 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest): 'lyrics sources testing not enabled') def test_google_sources_ok(self): """Test if lyrics present on websites registered in beets google custom - search engine are correctly scraped.""" + search engine are correctly scraped. + """ for s in self.GOOGLE_SOURCES: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_html( @@ -323,6 +324,7 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest): class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest): """Test scraping heuristics on a fake html page. """ + source = dict(url=u'http://www.example.com', artist=u'John Doe', title=u'Beets song', path=u'/lyrics/beetssong') @@ -330,7 +332,6 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest): """Set up configuration""" LyricsGoogleBaseTest.setUp(self) self.plugin = lyrics.LyricsPlugin() - @patch.object(lyrics.Backend, 'fetch_url', MockFetchUrl()) def test_mocked_source_ok(self): @@ -344,7 +345,8 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest): @patch.object(lyrics.Backend, 'fetch_url', MockFetchUrl()) def test_is_page_candidate_exact_match(self): """Test matching html page title with song infos -- when song infos are - present in the title.""" + present in the title. + """ from bs4 import SoupStrainer, BeautifulSoup s = self.source url = six.text_type(s['url'] + s['path']) @@ -356,7 +358,8 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest): def test_is_page_candidate_fuzzy_match(self): """Test matching html page title with song infos -- when song infos are - not present in the title.""" + not present in the title. + """ s = self.source url = s['url'] + s['path'] url_title = u'example.com | Beats song by John doe'