mirror of
https://github.com/beetbox/beets.git
synced 2026-01-30 03:54:21 +01:00
Merge pull request #4333 from olgarrahan/genius_lyrics
Genius lyrics header bug fixed and updated test case for lyrics plugin
This commit is contained in:
commit
614f83f542
4 changed files with 878 additions and 5 deletions
|
|
@ -387,6 +387,10 @@ class Genius(Backend):
|
|||
except ValueError:
|
||||
return None
|
||||
|
||||
def replace_br(self, lyrics_div):
|
||||
for br in lyrics_div.find_all("br"):
|
||||
br.replace_with("\n")
|
||||
|
||||
def _scrape_lyrics_from_html(self, html):
|
||||
"""Scrape lyrics from a given genius.com html"""
|
||||
|
||||
|
|
@ -401,7 +405,12 @@ class Genius(Backend):
|
|||
# all of the lyrics can be found already correctly formatted
|
||||
# Sometimes, though, it packages the lyrics into separate divs, most
|
||||
# likely for easier ad placement
|
||||
lyrics_div = soup.find("div", class_="lyrics")
|
||||
|
||||
lyrics_div = soup.find("div", {"data-lyrics-container": True})
|
||||
|
||||
if lyrics_div:
|
||||
self.replace_br(lyrics_div)
|
||||
|
||||
if not lyrics_div:
|
||||
self._log.debug('Received unusual song page html')
|
||||
verse_div = soup.find("div",
|
||||
|
|
@ -417,8 +426,7 @@ class Genius(Backend):
|
|||
return None
|
||||
|
||||
lyrics_div = verse_div.parent
|
||||
for br in lyrics_div.find_all("br"):
|
||||
br.replace_with("\n")
|
||||
self.replace_br(lyrics_div)
|
||||
|
||||
ads = lyrics_div.find_all("div",
|
||||
class_=re.compile("InreadAd__Container"))
|
||||
|
|
@ -429,7 +437,6 @@ class Genius(Backend):
|
|||
class_=re.compile("Lyrics__Footer"))
|
||||
for footer in footers:
|
||||
footer.replace_with("")
|
||||
|
||||
return lyrics_div.get_text()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -66,6 +66,8 @@ Bug fixes:
|
|||
* :doc:`plugins/embedart`: Fix a crash when using recent versions of
|
||||
ImageMagick and the ``compare_threshold`` option.
|
||||
:bug:`4272`
|
||||
* :doc:`plugins/lyrics`: Fixed issue with Genius header being included in lyrics,
|
||||
added test case of up-to-date Genius html
|
||||
|
||||
For packagers:
|
||||
|
||||
|
|
|
|||
864
test/rsrc/lyrics/geniuscom/Ttngchinchillalyrics.txt
Normal file
864
test/rsrc/lyrics/geniuscom/Ttngchinchillalyrics.txt
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -457,7 +457,7 @@ class GeniusScrapeLyricsFromHtmlTest(GeniusBaseTest):
|
|||
|
||||
def test_good_lyrics(self):
|
||||
"""Ensure we are able to scrape a page with lyrics"""
|
||||
url = 'https://genius.com/Wu-tang-clan-cream-lyrics'
|
||||
url = 'https://genius.com/Ttng-chinchilla-lyrics'
|
||||
mock = MockFetchUrl()
|
||||
self.assertIsNotNone(genius._scrape_lyrics_from_html(mock(url)))
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue