From 3f896ab28117cd9032a0d9fcc8fc5c871f954324 Mon Sep 17 00:00:00 2001 From: ybnd Date: Mon, 10 Jan 2022 19:03:36 +0100 Subject: [PATCH 1/3] Make Tekstowo scraper more specific --- beetsplug/lyrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 7d026def1..0856ebb34 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -488,11 +488,11 @@ class Tekstowo(Backend): if not soup: return None - lyrics_div = soup.find("div", class_="song-text") + lyrics_div = soup.select("div.song-text > div.inner-text") if not lyrics_div: return None - return lyrics_div.get_text() + return lyrics_div[0].get_text() def remove_credits(text): From 3a8520e30ab9ec6c435cacfa1d1508421ca2ecbe Mon Sep 17 00:00:00 2001 From: ybnd Date: Mon, 10 Jan 2022 19:07:59 +0100 Subject: [PATCH 2/3] Add changelog entry --- docs/changelog.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index d13dcdd4a..e4a10dc9c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -35,6 +35,8 @@ Bug fixes: * :doc:`plugins/web`: Fix handling of "query" requests. Previously queries consisting of more than one token (separated by a slash) always returned an empty result. +* :doc:`plugins/lyrics`: Fixed an issue with the Tekstowo.pl scraper where some + non-lyrics content got included in the lyrics For packagers: From 414760282b9ec374a26488a89da230f545f35b52 Mon Sep 17 00:00:00 2001 From: ybnd Date: Mon, 10 Jan 2022 22:07:58 +0100 Subject: [PATCH 3/3] Remove footer text from Genius lyrics --- beetsplug/lyrics.py | 6 ++++++ docs/changelog.rst | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 0856ebb34..1f215df45 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -419,11 +419,17 @@ class Genius(Backend): lyrics_div = verse_div.parent for br in lyrics_div.find_all("br"): br.replace_with("\n") + ads = lyrics_div.find_all("div", class_=re.compile("InreadAd__Container")) for ad in ads: ad.replace_with("\n") + footers = lyrics_div.find_all("div", + class_=re.compile("Lyrics__Footer")) + for footer in footers: + footer.replace_with("") + return lyrics_div.get_text() diff --git a/docs/changelog.rst b/docs/changelog.rst index e4a10dc9c..715853b66 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -35,8 +35,8 @@ Bug fixes: * :doc:`plugins/web`: Fix handling of "query" requests. Previously queries consisting of more than one token (separated by a slash) always returned an empty result. -* :doc:`plugins/lyrics`: Fixed an issue with the Tekstowo.pl scraper where some - non-lyrics content got included in the lyrics +* :doc:`plugins/lyrics`: Fixed issues with the Tekstowo.pl and Genius + backends where some non-lyrics content got included in the lyrics For packagers: