From 306571883ec2f20b71a2e658ccf27d124eebc512 Mon Sep 17 00:00:00 2001 From: Xavier Hocquet Date: Wed, 18 Mar 2020 22:59:55 -0600 Subject: [PATCH 1/5] Implement tekstowo lyrics provider --- beetsplug/lyrics.py | 54 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 0e797d5a3..0762cd3d1 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -436,6 +436,50 @@ class LyricsWiki(SymbolsReplaced): return lyrics +class Tekstowo(Backend): + # Fetch lyrics from Tekstowo.pl. + + BASE_URL = 'http://www.tekstowo.pl' + URL_PATTERN = BASE_URL + '/wyszukaj.html?search-title=%s&search-artist=%s' + + def fetch(self, artist, title): + url = self.build_url(title, artist) + search_results = self.fetch_url(url) + song_page_url = self.parse_search_results(search_results) + + if not song_page_url: + return None + + song_page_html = self.fetch_url(song_page_url) + return self.extract_lyrics(song_page_html) + + def parse_search_results(self, html): + if not HAS_BEAUTIFUL_SOUP: + return None + + html = _scrape_strip_cruft(html) + html = _scrape_merge_paragraphs(html) + + try: + html = BeautifulSoup(html, "html.parser") + except HTMLParseError: + return None + + href = html.find("div", class_="content").find_all("div", class_="box-przeboje")[0].find('a').get('href') + return self.BASE_URL + href + + def extract_lyrics(self, html): + html = _scrape_strip_cruft(html) + html = _scrape_merge_paragraphs(html) + + try: + html = BeautifulSoup(html, "html.parser") + except HTMLParseError: + return None + + return html.find("div", class_="song-text").get_text() + + def remove_credits(text): """Remove first/last line of text if it contains the word 'lyrics' eg 'Lyrics by songsdatabase.com' @@ -627,12 +671,13 @@ class Google(Backend): class LyricsPlugin(plugins.BeetsPlugin): - SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius'] + SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius', 'tekstowo'] SOURCE_BACKENDS = { 'google': Google, 'lyricwiki': LyricsWiki, 'musixmatch': MusiXmatch, 'genius': Genius, + 'tekstowo': Tekstowo, } def __init__(self): @@ -693,6 +738,13 @@ class LyricsPlugin(plugins.BeetsPlugin): ) sources.remove('genius') + if 'tekstowo' in sources and not HAS_BEAUTIFUL_SOUP: + self._log.debug( + u'The Tekstowo.pl backend requires BeautifulSoup, which is not ' + u'installed, so the source is disabled.' + ) + sources.remove('tekstowo') + self.config['bing_lang_from'] = [ x.lower() for x in self.config['bing_lang_from'].as_str_seq()] self.bing_auth_token = None From 1266a04998b1273402655067aecc8fe9d35531dd Mon Sep 17 00:00:00 2001 From: Xavier Hocquet Date: Thu, 19 Mar 2020 21:10:52 -0600 Subject: [PATCH 2/5] Docs, lint, and cleanup beautifulsoup source check --- beetsplug/lyrics.py | 42 +++++++++++++++++++++-------------------- docs/changelog.rst | 6 ++++-- docs/plugins/lyrics.rst | 18 ++++++++++++++---- test/test_lyrics.py | 2 ++ 4 files changed, 42 insertions(+), 26 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 0762cd3d1..91fc157a5 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -465,7 +465,13 @@ class Tekstowo(Backend): except HTMLParseError: return None - href = html.find("div", class_="content").find_all("div", class_="box-przeboje")[0].find('a').get('href') + song_row = html.find("div", class_="content"). \ + find_all("div", class_="box-przeboje")[0] + + if not song_row: + return None + + href = song_row.find('a').get('href') return self.BASE_URL + href def extract_lyrics(self, html): @@ -672,6 +678,7 @@ class Google(Backend): class LyricsPlugin(plugins.BeetsPlugin): SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius', 'tekstowo'] + SOURCES_USING_BEAUTIFUL_SOUP = ['google', 'genius', 'tekstowo'] SOURCE_BACKENDS = { 'google': Google, 'lyricwiki': LyricsWiki, @@ -716,6 +723,9 @@ class LyricsPlugin(plugins.BeetsPlugin): sources = plugins.sanitize_choices( self.config['sources'].as_str_seq(), available_sources) + if not HAS_BEAUTIFUL_SOUP: + sources = self.sanitize_beautiful_soup_sources(sources) + if 'google' in sources: if not self.config['google_API_key'].get(): # We log a *debug* message here because the default @@ -725,25 +735,6 @@ class LyricsPlugin(plugins.BeetsPlugin): self._log.debug(u'Disabling google source: ' u'no API key configured.') sources.remove('google') - elif not HAS_BEAUTIFUL_SOUP: - self._log.warning(u'To use the google lyrics source, you must ' - u'install the beautifulsoup4 module. See ' - u'the documentation for further details.') - sources.remove('google') - - if 'genius' in sources and not HAS_BEAUTIFUL_SOUP: - self._log.debug( - u'The Genius backend requires BeautifulSoup, which is not ' - u'installed, so the source is disabled.' - ) - sources.remove('genius') - - if 'tekstowo' in sources and not HAS_BEAUTIFUL_SOUP: - self._log.debug( - u'The Tekstowo.pl backend requires BeautifulSoup, which is not ' - u'installed, so the source is disabled.' - ) - sources.remove('tekstowo') self.config['bing_lang_from'] = [ x.lower() for x in self.config['bing_lang_from'].as_str_seq()] @@ -757,6 +748,17 @@ class LyricsPlugin(plugins.BeetsPlugin): self.backends = [self.SOURCE_BACKENDS[source](self.config, self._log) for source in sources] + def sanitize_beautiful_soup_sources(self, sources): + for source in self.SOURCES_USING_BEAUTIFUL_SOUP: + if source in sources: + self._log.warning(u'To use the %s lyrics source, you must ' + u'install the beautifulsoup4 module. See ' + u'the documentation for further details.' + % source) + sources.remove(source) + + return sources + def get_bing_access_token(self): params = { 'client_id': 'beets', diff --git a/docs/changelog.rst b/docs/changelog.rst index a51c85cad..981b2f89a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -19,7 +19,7 @@ New features: * :doc:`plugins/discogs` now adds two extra fields: `discogs_labelid` and `discogs_artistid` :bug: `3413` -* :doc:`/plugins/export`: Added new ``-f`` (``--format``) flag; +* :doc:`/plugins/export`: Added new ``-f`` (``--format``) flag; which allows for the ability to export in json, csv and xml. Thanks to :user:`austinmm`. :bug:`3402` @@ -114,7 +114,7 @@ New features: Fixes: -* :doc:`/plugins/fetchart`: Fixed a bug that caused fetchart to not take +* :doc:`/plugins/fetchart`: Fixed a bug that caused fetchart to not take environment variables such as proxy servers into account when making requests :bug:`3450` * :doc:`/plugins/inline`: In function-style field definitions that refer to @@ -174,6 +174,8 @@ Fixes: * Removed ``@classmethod`` decorator from dbcore.query.NoneQuery.match method failing with AttributeError when called. It is now an instance method. :bug:`3516` :bug:`3517` +* :doc:`/plugins/lyrics`: Added Tekstowo.pl lyrics provider + :bug:`3344` For plugin developers: diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index fac07ad87..72a664319 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -3,11 +3,12 @@ Lyrics Plugin The ``lyrics`` plugin fetches and stores song lyrics from databases on the Web. Namely, the current version of the plugin uses `Lyric Wiki`_, -`Musixmatch`_, `Genius.com`_, and, optionally, the Google custom search API. +`Musixmatch`_, `Genius.com`_, `Tekstowo.pl`_, and, optionally, the Google custom search API. .. _Lyric Wiki: https://lyrics.wikia.com/ .. _Musixmatch: https://www.musixmatch.com/ .. _Genius.com: https://genius.com/ +.. _Tekstowo.pl: https://www.tekstowo.pl/ Fetch Lyrics During Import @@ -59,11 +60,10 @@ configuration file. The available options are: sources known to be scrapeable. - **sources**: List of sources to search for lyrics. An asterisk ``*`` expands to all available sources. - Default: ``google lyricwiki musixmatch genius``, i.e., all the + Default: ``google lyricwiki musixmatch genius tekstowo``, i.e., all the available sources. The ``google`` source will be automatically deactivated if no ``google_API_key`` is setup. - Both it and the ``genius`` source will only be enabled if BeautifulSoup is - installed. + The following sources will only be enabled if BeatifulSoup is installed: ``[google, genius, tekstowo]`` Here's an example of ``config.yaml``:: @@ -166,6 +166,16 @@ library. Install it by typing:: The backend is enabled by default. +Activate Tekstowo.pl Lyrics +---------------------- + +Like the Google backend, the Tekstowo.pl backend requires the `BeautifulSoup`_ +library. Install it by typing:: + + pip install beautifulsoup4 + +The backend is enabled by default. + .. _lyrics-translation: Activate On-the-Fly Translation diff --git a/test/test_lyrics.py b/test/test_lyrics.py index f7ea538e2..9a5e9e893 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -251,6 +251,8 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest): dict(artist=u'Santana', title=u'Black magic woman', backend=lyrics.MusiXmatch), dict(DEFAULT_SONG, backend=lyrics.Genius), + dict(artist=u'Boy In Space', title=u'u n eye', + backend=lyrics.Tekstowo), ] GOOGLE_SOURCES = [ From 58bfe4567e7cf4f026f2c20a7b61b07699d12756 Mon Sep 17 00:00:00 2001 From: Xavier Hocquet Date: Sat, 28 Mar 2020 11:34:03 -0600 Subject: [PATCH 3/5] Code review --- beetsplug/lyrics.py | 8 ++++---- docs/plugins/lyrics.rst | 25 +++++-------------------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 91fc157a5..b22525978 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -678,7 +678,7 @@ class Google(Backend): class LyricsPlugin(plugins.BeetsPlugin): SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius', 'tekstowo'] - SOURCES_USING_BEAUTIFUL_SOUP = ['google', 'genius', 'tekstowo'] + BS_SOURCES = ['google', 'genius', 'tekstowo'] SOURCE_BACKENDS = { 'google': Google, 'lyricwiki': LyricsWiki, @@ -724,7 +724,7 @@ class LyricsPlugin(plugins.BeetsPlugin): self.config['sources'].as_str_seq(), available_sources) if not HAS_BEAUTIFUL_SOUP: - sources = self.sanitize_beautiful_soup_sources(sources) + sources = self.sanitize_bs_sources(sources) if 'google' in sources: if not self.config['google_API_key'].get(): @@ -748,8 +748,8 @@ class LyricsPlugin(plugins.BeetsPlugin): self.backends = [self.SOURCE_BACKENDS[source](self.config, self._log) for source in sources] - def sanitize_beautiful_soup_sources(self, sources): - for source in self.SOURCES_USING_BEAUTIFUL_SOUP: + def sanitize_bs_sources(self, sources): + for source in self.BS_SOURCES: if source in sources: self._log.warning(u'To use the %s lyrics source, you must ' u'install the beautifulsoup4 module. See ' diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index 72a664319..820af89cd 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -129,11 +129,6 @@ few suggestions. Activate Google Custom Search ------------------------------ -Using the Google backend requires `BeautifulSoup`_, which you can install -using `pip`_ by typing:: - - pip install beautifulsoup4 - You also need to `register for a Google API key`_. Set the ``google_API_key`` configuration option to your key. Then add ``google`` to the list of sources in your configuration (or use @@ -156,25 +151,15 @@ After that, the lyrics plugin will fall back on other declared data sources. .. _pip: https://pip.pypa.io .. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/ -Activate Genius Lyrics ----------------------- +Installing BeautifulSoup for Google, Genius, or Tekstowo.pl Backends +-------------------------------------------------------------------- -Like the Google backend, the Genius backend requires the `BeautifulSoup`_ -library. Install it by typing:: +Using the Google, Genius, or Tekstowo.pl backends requires `BeautifulSoup`_, +which you can install using `pip`_ by typing:: pip install beautifulsoup4 -The backend is enabled by default. - -Activate Tekstowo.pl Lyrics ----------------------- - -Like the Google backend, the Tekstowo.pl backend requires the `BeautifulSoup`_ -library. Install it by typing:: - - pip install beautifulsoup4 - -The backend is enabled by default. +These backends are enabled by default. .. _lyrics-translation: From 38ea35aa5e59011179a10bf11dff0f1d44f8f84f Mon Sep 17 00:00:00 2001 From: Xavier Hocquet Date: Sat, 28 Mar 2020 11:44:56 -0600 Subject: [PATCH 4/5] Lint --- beetsplug/lyrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index b22525978..1d92b24e5 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -469,7 +469,7 @@ class Tekstowo(Backend): find_all("div", class_="box-przeboje")[0] if not song_row: - return None + return None href = song_row.find('a').get('href') return self.BASE_URL + href @@ -752,9 +752,9 @@ class LyricsPlugin(plugins.BeetsPlugin): for source in self.BS_SOURCES: if source in sources: self._log.warning(u'To use the %s lyrics source, you must ' - u'install the beautifulsoup4 module. See ' - u'the documentation for further details.' - % source) + u'install the beautifulsoup4 module. See ' + u'the documentation for further details.' + % source) sources.remove(source) return sources From 6081e6a8dff95edbf636f105d2548473ff12e95b Mon Sep 17 00:00:00 2001 From: Xavier Hocquet Date: Sun, 28 Mar 2021 13:08:00 -0500 Subject: [PATCH 5/5] CR comments --- beetsplug/lyrics.py | 8 ++++---- docs/plugins/lyrics.rst | 14 ++++++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 182e753f6..08efd38ff 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -716,10 +716,10 @@ class LyricsPlugin(plugins.BeetsPlugin): def sanitize_bs_sources(self, sources): for source in self.BS_SOURCES: if source in sources: - self._log.warning(u'To use the %s lyrics source, you must ' - u'install the beautifulsoup4 module. See ' - u'the documentation for further details.' - % source) + self._log.debug(u'To use the %s lyrics source, you must ' + u'install the beautifulsoup4 module. See ' + u'the documentation for further details.' + % source) sources.remove(source) return sources diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index c8e12b526..f05ef6357 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -62,7 +62,8 @@ configuration file. The available options are: Default: ``google musixmatch genius tekstowo``, i.e., all the available sources. The ``google`` source will be automatically deactivated if no ``google_API_key`` is setup. - The following sources will only be enabled if BeatifulSoup is installed: ``[google, genius, tekstowo]`` + The ``google``, ``genius``, and ``tekstowo`` sources will only be enabled if + BeautifulSoup is installed. Here's an example of ``config.yaml``:: @@ -128,6 +129,11 @@ few suggestions. Activate Google Custom Search ------------------------------ +Using the Google backend requires `BeautifulSoup`_, which you can install +using `pip`_ by typing:: + + pip install beautifulsoup4 + You also need to `register for a Google API key`_. Set the ``google_API_key`` configuration option to your key. Then add ``google`` to the list of sources in your configuration (or use @@ -150,11 +156,11 @@ After that, the lyrics plugin will fall back on other declared data sources. .. _pip: https://pip.pypa.io .. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/ -Installing BeautifulSoup for Google, Genius, or Tekstowo.pl Backends +Activate Genius and Tekstowo.pl Lyrics -------------------------------------------------------------------- -Using the Google, Genius, or Tekstowo.pl backends requires `BeautifulSoup`_, -which you can install using `pip`_ by typing:: +Using the Genius or Tekstowo.pl backends requires `BeautifulSoup`_, which +you can install using `pip`_ by typing:: pip install beautifulsoup4