mirror of
https://github.com/beetbox/beets.git
synced 2025-12-23 09:03:49 +01:00
Merge pull request #3519 from xhocquet/tekstowo-lyrics
Implement tekstowo lyrics provider
This commit is contained in:
commit
1ead968a27
4 changed files with 81 additions and 22 deletions
|
|
@ -401,6 +401,56 @@ class Genius(Backend):
|
|||
return lyrics_div.get_text()
|
||||
|
||||
|
||||
class Tekstowo(Backend):
|
||||
# Fetch lyrics from Tekstowo.pl.
|
||||
|
||||
BASE_URL = 'http://www.tekstowo.pl'
|
||||
URL_PATTERN = BASE_URL + '/wyszukaj.html?search-title=%s&search-artist=%s'
|
||||
|
||||
def fetch(self, artist, title):
|
||||
url = self.build_url(title, artist)
|
||||
search_results = self.fetch_url(url)
|
||||
song_page_url = self.parse_search_results(search_results)
|
||||
|
||||
if not song_page_url:
|
||||
return None
|
||||
|
||||
song_page_html = self.fetch_url(song_page_url)
|
||||
return self.extract_lyrics(song_page_html)
|
||||
|
||||
def parse_search_results(self, html):
|
||||
if not HAS_BEAUTIFUL_SOUP:
|
||||
return None
|
||||
|
||||
html = _scrape_strip_cruft(html)
|
||||
html = _scrape_merge_paragraphs(html)
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(html, "html.parser")
|
||||
except HTMLParseError:
|
||||
return None
|
||||
|
||||
song_row = html.find("div", class_="content"). \
|
||||
find_all("div", class_="box-przeboje")[0]
|
||||
|
||||
if not song_row:
|
||||
return None
|
||||
|
||||
href = song_row.find('a').get('href')
|
||||
return self.BASE_URL + href
|
||||
|
||||
def extract_lyrics(self, html):
|
||||
html = _scrape_strip_cruft(html)
|
||||
html = _scrape_merge_paragraphs(html)
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(html, "html.parser")
|
||||
except HTMLParseError:
|
||||
return None
|
||||
|
||||
return html.find("div", class_="song-text").get_text()
|
||||
|
||||
|
||||
def remove_credits(text):
|
||||
"""Remove first/last line of text if it contains the word 'lyrics'
|
||||
eg 'Lyrics by songsdatabase.com'
|
||||
|
|
@ -593,11 +643,13 @@ class Google(Backend):
|
|||
|
||||
|
||||
class LyricsPlugin(plugins.BeetsPlugin):
|
||||
SOURCES = ['google', 'musixmatch', 'genius']
|
||||
SOURCES = ['google', 'musixmatch', 'genius', 'tekstowo']
|
||||
BS_SOURCES = ['google', 'genius', 'tekstowo']
|
||||
SOURCE_BACKENDS = {
|
||||
'google': Google,
|
||||
'musixmatch': MusiXmatch,
|
||||
'genius': Genius,
|
||||
'tekstowo': Tekstowo,
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
|
|
@ -636,6 +688,9 @@ class LyricsPlugin(plugins.BeetsPlugin):
|
|||
sources = plugins.sanitize_choices(
|
||||
self.config['sources'].as_str_seq(), available_sources)
|
||||
|
||||
if not HAS_BEAUTIFUL_SOUP:
|
||||
sources = self.sanitize_bs_sources(sources)
|
||||
|
||||
if 'google' in sources:
|
||||
if not self.config['google_API_key'].get():
|
||||
# We log a *debug* message here because the default
|
||||
|
|
@ -645,18 +700,6 @@ class LyricsPlugin(plugins.BeetsPlugin):
|
|||
self._log.debug(u'Disabling google source: '
|
||||
u'no API key configured.')
|
||||
sources.remove('google')
|
||||
elif not HAS_BEAUTIFUL_SOUP:
|
||||
self._log.warning(u'To use the google lyrics source, you must '
|
||||
u'install the beautifulsoup4 module. See '
|
||||
u'the documentation for further details.')
|
||||
sources.remove('google')
|
||||
|
||||
if 'genius' in sources and not HAS_BEAUTIFUL_SOUP:
|
||||
self._log.debug(
|
||||
u'The Genius backend requires BeautifulSoup, which is not '
|
||||
u'installed, so the source is disabled.'
|
||||
)
|
||||
sources.remove('genius')
|
||||
|
||||
self.config['bing_lang_from'] = [
|
||||
x.lower() for x in self.config['bing_lang_from'].as_str_seq()]
|
||||
|
|
@ -670,6 +713,17 @@ class LyricsPlugin(plugins.BeetsPlugin):
|
|||
self.backends = [self.SOURCE_BACKENDS[source](self.config, self._log)
|
||||
for source in sources]
|
||||
|
||||
def sanitize_bs_sources(self, sources):
|
||||
for source in self.BS_SOURCES:
|
||||
if source in sources:
|
||||
self._log.debug(u'To use the %s lyrics source, you must '
|
||||
u'install the beautifulsoup4 module. See '
|
||||
u'the documentation for further details.'
|
||||
% source)
|
||||
sources.remove(source)
|
||||
|
||||
return sources
|
||||
|
||||
def get_bing_access_token(self):
|
||||
params = {
|
||||
'client_id': 'beets',
|
||||
|
|
|
|||
|
|
@ -288,6 +288,8 @@ Fixes:
|
|||
* Removed ``@classmethod`` decorator from dbcore.query.NoneQuery.match method
|
||||
failing with AttributeError when called. It is now an instance method.
|
||||
:bug:`3516` :bug:`3517`
|
||||
* :doc:`/plugins/lyrics`: Added Tekstowo.pl lyrics provider
|
||||
:bug:`3344`
|
||||
* :doc:`/plugins/lyrics`: Tolerate missing lyrics div in Genius scraper.
|
||||
Thanks to :user:`thejli21`.
|
||||
:bug:`3535` :bug:`3554`
|
||||
|
|
|
|||
|
|
@ -3,10 +3,11 @@ Lyrics Plugin
|
|||
|
||||
The ``lyrics`` plugin fetches and stores song lyrics from databases on the Web.
|
||||
Namely, the current version of the plugin uses `Musixmatch`_, `Genius.com`_,
|
||||
and, optionally, the Google custom search API.
|
||||
`Tekstowo.pl`_, and, optionally, the Google custom search API.
|
||||
|
||||
.. _Musixmatch: https://www.musixmatch.com/
|
||||
.. _Genius.com: https://genius.com/
|
||||
.. _Tekstowo.pl: https://www.tekstowo.pl/
|
||||
|
||||
|
||||
Fetch Lyrics During Import
|
||||
|
|
@ -58,11 +59,11 @@ configuration file. The available options are:
|
|||
sources known to be scrapeable.
|
||||
- **sources**: List of sources to search for lyrics. An asterisk ``*`` expands
|
||||
to all available sources.
|
||||
Default: ``google musixmatch genius``, i.e., all the
|
||||
Default: ``google musixmatch genius tekstowo``, i.e., all the
|
||||
available sources. The ``google`` source will be automatically
|
||||
deactivated if no ``google_API_key`` is setup.
|
||||
Both it and the ``genius`` source will only be enabled if BeautifulSoup is
|
||||
installed.
|
||||
The ``google``, ``genius``, and ``tekstowo`` sources will only be enabled if
|
||||
BeautifulSoup is installed.
|
||||
|
||||
Here's an example of ``config.yaml``::
|
||||
|
||||
|
|
@ -155,15 +156,15 @@ After that, the lyrics plugin will fall back on other declared data sources.
|
|||
.. _pip: https://pip.pypa.io
|
||||
.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
||||
Activate Genius Lyrics
|
||||
----------------------
|
||||
Activate Genius and Tekstowo.pl Lyrics
|
||||
--------------------------------------------------------------------
|
||||
|
||||
Like the Google backend, the Genius backend requires the `BeautifulSoup`_
|
||||
library. Install it by typing::
|
||||
Using the Genius or Tekstowo.pl backends requires `BeautifulSoup`_, which
|
||||
you can install using `pip`_ by typing::
|
||||
|
||||
pip install beautifulsoup4
|
||||
|
||||
The backend is enabled by default.
|
||||
These backends are enabled by default.
|
||||
|
||||
.. _lyrics-translation:
|
||||
|
||||
|
|
|
|||
|
|
@ -274,6 +274,8 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest):
|
|||
dict(DEFAULT_SONG, backend=lyrics.Genius,
|
||||
# GitHub actions is on some form of Cloudflare blacklist.
|
||||
skip=os.environ.get('GITHUB_ACTIONS') == 'true'),
|
||||
dict(artist=u'Boy In Space', title=u'u n eye',
|
||||
backend=lyrics.Tekstowo),
|
||||
]
|
||||
|
||||
GOOGLE_SOURCES = [
|
||||
|
|
|
|||
Loading…
Reference in a new issue