Merge pull request #4333 from olgarrahan/genius_lyrics

Genius lyrics header bug fixed and updated test case for lyrics plugin
This commit is contained in:
Adrian Sampson 2022-04-18 10:17:35 -04:00 committed by GitHub
commit 614f83f542
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 878 additions and 5 deletions

View file

@ -387,6 +387,10 @@ class Genius(Backend):
except ValueError:
return None
def replace_br(self, lyrics_div):
for br in lyrics_div.find_all("br"):
br.replace_with("\n")
def _scrape_lyrics_from_html(self, html):
"""Scrape lyrics from a given genius.com html"""
@ -401,7 +405,12 @@ class Genius(Backend):
# all of the lyrics can be found already correctly formatted
# Sometimes, though, it packages the lyrics into separate divs, most
# likely for easier ad placement
lyrics_div = soup.find("div", class_="lyrics")
lyrics_div = soup.find("div", {"data-lyrics-container": True})
if lyrics_div:
self.replace_br(lyrics_div)
if not lyrics_div:
self._log.debug('Received unusual song page html')
verse_div = soup.find("div",
@ -417,8 +426,7 @@ class Genius(Backend):
return None
lyrics_div = verse_div.parent
for br in lyrics_div.find_all("br"):
br.replace_with("\n")
self.replace_br(lyrics_div)
ads = lyrics_div.find_all("div",
class_=re.compile("InreadAd__Container"))
@ -429,7 +437,6 @@ class Genius(Backend):
class_=re.compile("Lyrics__Footer"))
for footer in footers:
footer.replace_with("")
return lyrics_div.get_text()

View file

@ -66,6 +66,8 @@ Bug fixes:
* :doc:`plugins/embedart`: Fix a crash when using recent versions of
ImageMagick and the ``compare_threshold`` option.
:bug:`4272`
* :doc:`plugins/lyrics`: Fixed issue with Genius header being included in lyrics,
added test case of up-to-date Genius html
For packagers:

File diff suppressed because one or more lines are too long

View file

@ -457,7 +457,7 @@ class GeniusScrapeLyricsFromHtmlTest(GeniusBaseTest):
def test_good_lyrics(self):
"""Ensure we are able to scrape a page with lyrics"""
url = 'https://genius.com/Wu-tang-clan-cream-lyrics'
url = 'https://genius.com/Ttng-chinchilla-lyrics'
mock = MockFetchUrl()
self.assertIsNotNone(genius._scrape_lyrics_from_html(mock(url)))