Merge pull request #3519 from xhocquet/tekstowo-lyrics

Implement tekstowo lyrics provider
This commit is contained in:
Adrian Sampson 2021-03-28 14:23:23 -04:00
commit 1ead968a27
No known key found for this signature in database
GPG key ID: BDB93AB409CC8705
4 changed files with 81 additions and 22 deletions

View file

@ -401,6 +401,56 @@ class Genius(Backend):
return lyrics_div.get_text()
class Tekstowo(Backend):
# Fetch lyrics from Tekstowo.pl.
BASE_URL = 'http://www.tekstowo.pl'
URL_PATTERN = BASE_URL + '/wyszukaj.html?search-title=%s&search-artist=%s'
def fetch(self, artist, title):
url = self.build_url(title, artist)
search_results = self.fetch_url(url)
song_page_url = self.parse_search_results(search_results)
if not song_page_url:
return None
song_page_html = self.fetch_url(song_page_url)
return self.extract_lyrics(song_page_html)
def parse_search_results(self, html):
if not HAS_BEAUTIFUL_SOUP:
return None
html = _scrape_strip_cruft(html)
html = _scrape_merge_paragraphs(html)
try:
html = BeautifulSoup(html, "html.parser")
except HTMLParseError:
return None
song_row = html.find("div", class_="content"). \
find_all("div", class_="box-przeboje")[0]
if not song_row:
return None
href = song_row.find('a').get('href')
return self.BASE_URL + href
def extract_lyrics(self, html):
html = _scrape_strip_cruft(html)
html = _scrape_merge_paragraphs(html)
try:
html = BeautifulSoup(html, "html.parser")
except HTMLParseError:
return None
return html.find("div", class_="song-text").get_text()
def remove_credits(text):
"""Remove first/last line of text if it contains the word 'lyrics'
eg 'Lyrics by songsdatabase.com'
@ -593,11 +643,13 @@ class Google(Backend):
class LyricsPlugin(plugins.BeetsPlugin):
SOURCES = ['google', 'musixmatch', 'genius']
SOURCES = ['google', 'musixmatch', 'genius', 'tekstowo']
BS_SOURCES = ['google', 'genius', 'tekstowo']
SOURCE_BACKENDS = {
'google': Google,
'musixmatch': MusiXmatch,
'genius': Genius,
'tekstowo': Tekstowo,
}
def __init__(self):
@ -636,6 +688,9 @@ class LyricsPlugin(plugins.BeetsPlugin):
sources = plugins.sanitize_choices(
self.config['sources'].as_str_seq(), available_sources)
if not HAS_BEAUTIFUL_SOUP:
sources = self.sanitize_bs_sources(sources)
if 'google' in sources:
if not self.config['google_API_key'].get():
# We log a *debug* message here because the default
@ -645,18 +700,6 @@ class LyricsPlugin(plugins.BeetsPlugin):
self._log.debug(u'Disabling google source: '
u'no API key configured.')
sources.remove('google')
elif not HAS_BEAUTIFUL_SOUP:
self._log.warning(u'To use the google lyrics source, you must '
u'install the beautifulsoup4 module. See '
u'the documentation for further details.')
sources.remove('google')
if 'genius' in sources and not HAS_BEAUTIFUL_SOUP:
self._log.debug(
u'The Genius backend requires BeautifulSoup, which is not '
u'installed, so the source is disabled.'
)
sources.remove('genius')
self.config['bing_lang_from'] = [
x.lower() for x in self.config['bing_lang_from'].as_str_seq()]
@ -670,6 +713,17 @@ class LyricsPlugin(plugins.BeetsPlugin):
self.backends = [self.SOURCE_BACKENDS[source](self.config, self._log)
for source in sources]
def sanitize_bs_sources(self, sources):
for source in self.BS_SOURCES:
if source in sources:
self._log.debug(u'To use the %s lyrics source, you must '
u'install the beautifulsoup4 module. See '
u'the documentation for further details.'
% source)
sources.remove(source)
return sources
def get_bing_access_token(self):
params = {
'client_id': 'beets',

View file

@ -288,6 +288,8 @@ Fixes:
* Removed ``@classmethod`` decorator from dbcore.query.NoneQuery.match method
failing with AttributeError when called. It is now an instance method.
:bug:`3516` :bug:`3517`
* :doc:`/plugins/lyrics`: Added Tekstowo.pl lyrics provider
:bug:`3344`
* :doc:`/plugins/lyrics`: Tolerate missing lyrics div in Genius scraper.
Thanks to :user:`thejli21`.
:bug:`3535` :bug:`3554`

View file

@ -3,10 +3,11 @@ Lyrics Plugin
The ``lyrics`` plugin fetches and stores song lyrics from databases on the Web.
Namely, the current version of the plugin uses `Musixmatch`_, `Genius.com`_,
and, optionally, the Google custom search API.
`Tekstowo.pl`_, and, optionally, the Google custom search API.
.. _Musixmatch: https://www.musixmatch.com/
.. _Genius.com: https://genius.com/
.. _Tekstowo.pl: https://www.tekstowo.pl/
Fetch Lyrics During Import
@ -58,11 +59,11 @@ configuration file. The available options are:
sources known to be scrapeable.
- **sources**: List of sources to search for lyrics. An asterisk ``*`` expands
to all available sources.
Default: ``google musixmatch genius``, i.e., all the
Default: ``google musixmatch genius tekstowo``, i.e., all the
available sources. The ``google`` source will be automatically
deactivated if no ``google_API_key`` is setup.
Both it and the ``genius`` source will only be enabled if BeautifulSoup is
installed.
The ``google``, ``genius``, and ``tekstowo`` sources will only be enabled if
BeautifulSoup is installed.
Here's an example of ``config.yaml``::
@ -155,15 +156,15 @@ After that, the lyrics plugin will fall back on other declared data sources.
.. _pip: https://pip.pypa.io
.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
Activate Genius Lyrics
----------------------
Activate Genius and Tekstowo.pl Lyrics
--------------------------------------------------------------------
Like the Google backend, the Genius backend requires the `BeautifulSoup`_
library. Install it by typing::
Using the Genius or Tekstowo.pl backends requires `BeautifulSoup`_, which
you can install using `pip`_ by typing::
pip install beautifulsoup4
The backend is enabled by default.
These backends are enabled by default.
.. _lyrics-translation:

View file

@ -274,6 +274,8 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest):
dict(DEFAULT_SONG, backend=lyrics.Genius,
# GitHub actions is on some form of Cloudflare blacklist.
skip=os.environ.get('GITHUB_ACTIONS') == 'true'),
dict(artist=u'Boy In Space', title=u'u n eye',
backend=lyrics.Tekstowo),
]
GOOGLE_SOURCES = [