diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 3d0e09673..2ec362356 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -59,9 +59,6 @@ except ImportError: JSONDict = dict[str, Any] -DIV_RE = re.compile(r"<(/?)div>?", re.I) -COMMENT_RE = re.compile(r"", re.S) -TAG_RE = re.compile(r"<[^>]*>") BREAK_RE = re.compile(r"\n?\s*]*)*>\s*\n?", re.I) USER_AGENT = f"beets/{beets.__version__}" INSTRUMENTAL_LYRICS = "[Instrumental]" @@ -552,8 +549,11 @@ class Genius(SearchBackend): check = partial(self.check_match, artist, title) for hit in json["response"]["hits"]: result = hit["result"] - if check(result["primary_artist"]["name"], result["title"]): - return self.scrape_lyrics(self.fetch_text(hit["result"]["url"])) + url = hit["result"]["url"] + if check(result["primary_artist"]["name"], result["title"]) and ( + lyrics := self.scrape_lyrics(self.fetch_text(url)) + ): + return collapse_newlines(lyrics) return None @@ -670,7 +670,10 @@ def remove_credits(text): return text -def _scrape_strip_cruft(html, plain_text_out=False): +collapse_newlines = partial(re.compile(r"\n{3,}").sub, r"\n\n") + + +def _scrape_strip_cruft(html: str) -> str: """Clean up HTML""" html = unescape(html) @@ -682,13 +685,8 @@ def _scrape_strip_cruft(html, plain_text_out=False): html = re.sub("