Implement tekstowo lyrics provider

This commit is contained in:
Xavier Hocquet 2020-03-18 22:59:55 -06:00
parent b34d1f71a9
commit 306571883e

View file

@ -436,6 +436,50 @@ class LyricsWiki(SymbolsReplaced):
return lyrics return lyrics
class Tekstowo(Backend):
# Fetch lyrics from Tekstowo.pl.
BASE_URL = 'http://www.tekstowo.pl'
URL_PATTERN = BASE_URL + '/wyszukaj.html?search-title=%s&search-artist=%s'
def fetch(self, artist, title):
url = self.build_url(title, artist)
search_results = self.fetch_url(url)
song_page_url = self.parse_search_results(search_results)
if not song_page_url:
return None
song_page_html = self.fetch_url(song_page_url)
return self.extract_lyrics(song_page_html)
def parse_search_results(self, html):
if not HAS_BEAUTIFUL_SOUP:
return None
html = _scrape_strip_cruft(html)
html = _scrape_merge_paragraphs(html)
try:
html = BeautifulSoup(html, "html.parser")
except HTMLParseError:
return None
href = html.find("div", class_="content").find_all("div", class_="box-przeboje")[0].find('a').get('href')
return self.BASE_URL + href
def extract_lyrics(self, html):
html = _scrape_strip_cruft(html)
html = _scrape_merge_paragraphs(html)
try:
html = BeautifulSoup(html, "html.parser")
except HTMLParseError:
return None
return html.find("div", class_="song-text").get_text()
def remove_credits(text): def remove_credits(text):
"""Remove first/last line of text if it contains the word 'lyrics' """Remove first/last line of text if it contains the word 'lyrics'
eg 'Lyrics by songsdatabase.com' eg 'Lyrics by songsdatabase.com'
@ -627,12 +671,13 @@ class Google(Backend):
class LyricsPlugin(plugins.BeetsPlugin): class LyricsPlugin(plugins.BeetsPlugin):
SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius'] SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius', 'tekstowo']
SOURCE_BACKENDS = { SOURCE_BACKENDS = {
'google': Google, 'google': Google,
'lyricwiki': LyricsWiki, 'lyricwiki': LyricsWiki,
'musixmatch': MusiXmatch, 'musixmatch': MusiXmatch,
'genius': Genius, 'genius': Genius,
'tekstowo': Tekstowo,
} }
def __init__(self): def __init__(self):
@ -693,6 +738,13 @@ class LyricsPlugin(plugins.BeetsPlugin):
) )
sources.remove('genius') sources.remove('genius')
if 'tekstowo' in sources and not HAS_BEAUTIFUL_SOUP:
self._log.debug(
u'The Tekstowo.pl backend requires BeautifulSoup, which is not '
u'installed, so the source is disabled.'
)
sources.remove('tekstowo')
self.config['bing_lang_from'] = [ self.config['bing_lang_from'] = [
x.lower() for x in self.config['bing_lang_from'].as_str_seq()] x.lower() for x in self.config['bing_lang_from'].as_str_seq()]
self.bing_auth_token = None self.bing_auth_token = None