From dd9f178fffd1cd130bde56fdbaae853aab4ca0c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?=
Date: Sat, 19 Oct 2024 03:30:41 +0100
Subject: [PATCH] Do not try to strip cruft from the parsed lyrics text.
Having removed it I fuond that only the Genius lyrics changed: it had en
extra new line. Thus I defined a function 'collapse_newlines' which now
gets called for the Genius lyrics.
---
beetsplug/lyrics.py | 24 +++++++++++-------------
test/plugins/test_lyrics.py | 5 ++---
2 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 3d0e09673..2ec362356 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -59,9 +59,6 @@ except ImportError:
JSONDict = dict[str, Any]
-DIV_RE = re.compile(r"<(/?)div>?", re.I)
-COMMENT_RE = re.compile(r"", re.S)
-TAG_RE = re.compile(r"<[^>]*>")
BREAK_RE = re.compile(r"\n?\s*
]*)*>\s*\n?", re.I)
USER_AGENT = f"beets/{beets.__version__}"
INSTRUMENTAL_LYRICS = "[Instrumental]"
@@ -552,8 +549,11 @@ class Genius(SearchBackend):
check = partial(self.check_match, artist, title)
for hit in json["response"]["hits"]:
result = hit["result"]
- if check(result["primary_artist"]["name"], result["title"]):
- return self.scrape_lyrics(self.fetch_text(hit["result"]["url"]))
+ url = hit["result"]["url"]
+ if check(result["primary_artist"]["name"], result["title"]) and (
+ lyrics := self.scrape_lyrics(self.fetch_text(url))
+ ):
+ return collapse_newlines(lyrics)
return None
@@ -670,7 +670,10 @@ def remove_credits(text):
return text
-def _scrape_strip_cruft(html, plain_text_out=False):
+collapse_newlines = partial(re.compile(r"\n{3,}").sub, r"\n\n")
+
+
+def _scrape_strip_cruft(html: str) -> str:
"""Clean up HTML"""
html = unescape(html)
@@ -682,13 +685,8 @@ def _scrape_strip_cruft(html, plain_text_out=False):
html = re.sub("
two
three"