diff --git a/.gitignore b/.gitignore index 64f08abe5..241202b88 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,6 @@ # Project Specific patterns man -test/rsrc/lyrics/* # The rest is from https://www.gitignore.io/api/python diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 8e45d25b4..81040af3a 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -359,16 +359,54 @@ class Genius(Backend): 'User-Agent': USER_AGENT, } - def lyrics_from_song_page(self, page_url): - # Gotta go regular html scraping... come on Genius. - self._log.debug(u'fetching lyrics from: {0}', page_url) - try: - page = requests.get(page_url) - except requests.RequestException as exc: - self._log.debug(u'Genius page request for {0} failed: {1}', - page_url, exc) + def fetch(self, artist, title): + """Fetch lyrics from genius.com + + Because genius doesn't allow accesssing lyrics via the api, + we first query the api for a url matching our artist & title, + then attempt to scrape that url for the lyrics. + """ + json = self._search(artist, title) + if not json: + self._log.debug(u'Genius API request returned invalid JSON') return None - html = BeautifulSoup(page.text, "html.parser") + + # find a matching artist in the json + for hit in json["response"]["hits"]: + hit_artist = hit["result"]["primary_artist"]["name"] + + if slug(hit_artist) == slug(artist): + return self._scrape_lyrics_from_html( + self.fetch_url(hit["result"]["url"])) + + self._log.debug(u'Genius failed to find a matching artist for \'{0}\'', + artist) + + def _search(self, artist, title): + """Searches the genius api for a given artist and title + + https://docs.genius.com/#search-h2 + + :returns: json response + """ + search_url = self.base_url + "/search" + data = {'q': title + " " + artist.lower()} + try: + response = requests.get( + search_url, data=data, headers=self.headers) + except requests.RequestException as exc: + self._log.debug(u'Genius API request failed: {0}', exc) + return None + + try: + return response.json() + except ValueError: + return None + + def _scrape_lyrics_from_html(self, html): + """Scrape lyrics from a given genius.com html""" + + html = BeautifulSoup(html, "html.parser") # Remove script tags that they put in the middle of the lyrics. [h.extract() for h in html('script')] @@ -402,31 +440,6 @@ class Genius(Backend): return lyrics_div.get_text() - def fetch(self, artist, title): - search_url = self.base_url + "/search" - data = {'q': title + " " + artist.lower()} - try: - response = requests.get(search_url, data=data, - headers=self.headers) - except requests.RequestException as exc: - self._log.debug(u'Genius API request failed: {0}', exc) - return None - - try: - json = response.json() - except ValueError: - self._log.debug(u'Genius API request returned invalid JSON') - return None - - for hit in json["response"]["hits"]: - hit_artist = hit["result"]["primary_artist"]["name"] - - if slug(hit_artist) == slug(artist): - return self.lyrics_from_song_page(hit["result"]["url"]) - - self._log.debug(u'Genius failed to find a matching artist for \'{0}\'', - artist) - class LyricsWiki(SymbolsReplaced): """Fetch lyrics from LyricsWiki.""" diff --git a/test/rsrc/lyrics/geniuscom/Wutangclancreamlyrics.txt b/test/rsrc/lyrics/geniuscom/Wutangclancreamlyrics.txt new file mode 100644 index 000000000..08518f8ee --- /dev/null +++ b/test/rsrc/lyrics/geniuscom/Wutangclancreamlyrics.txt @@ -0,0 +1,2227 @@ + + + + + + + +Wu-Tang Clan – C.R.E.A.M. Lyrics | Genius Lyrics + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + {{:: 'cloud_flare_always_on_short_message' | i18n }} +
Check @genius for updates. We'll have things fixed soon. +
+
+
+ + +
+ GENIUS +
+ + + +
+ + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ + + + + + + + + + + + + + + + +
+ +
+ +
+
+
+
+
+ Https%3a%2f%2fimages +
+
+ +
+
+ +

C.R.E.A.M.

+

+ Wu-Tang Clan +

+ +

+ + +

+

+ + + + +

+

+ + + + +

+ +
+
+
+
+
+
+
+ + +
+ + +
+
+ + + + +
+ +

+ About “C.R.E.A.M.” +

+ + +
+
+

Arguably one of the most iconic songs in hip-hop, the underlying idea of “C.R.E.A.M.” is found in its title—cash rules everything. The timeless piano riffs and background vocals come from a chopped up sample of The Charmels‘ 1967 record, “As Long As I’ve Got You,” that make up the entire track.

+ +

Although it was released as an official single in 1994, “C.R.E.A.M.” was first recorded in 1991, around the same time as RZA’s assault case, and featured himself and Ghostface Killah. The track went through several revisions and was later re-recorded by Raekwon and Inspectah Deck in 1993—an early title of the song was “Lifestyles of the Mega-Rich.”

+ +

In 2017, RZA explained to Power 106 how the final version of the track came together:

+ +

Once we got to the studio, I decided that this track had to be on the Wu-Tang album. I reminded Rae and Deck of their verses—their verses were long. […] Method Man, the master of hooks at the time, came in with this hook right here: ‘cash rules everything around me, cream, get the money.’ Once he added that element, I knew it was going to be a smash.

+ +

Since its release, the song and chorus have been referenced countless times by several artists. It has also been featured in movies such as Eminem’s 8 Mile and the N.W.A biopic, Straight Outta Compton.

+
+ +
+ +
+
+
    + +
  • +
    +

    What has RZA said about the song?

    +
    + +
    +

    + +
    +
    + + +
  • + + +
  • +
    +

    What has Raekwon said about the song?

    +
    + +
    +
    +

    ‘C.R.E.A.M.’ did a lot for my career personally. It gave me an opportunity to revisit the times where that cream meant that much to us. So, yeah, when I think of this record it just automatically puts me back into ‘87/’88 where we were standing in front of the building. It’s cold outside. We didn’t care. We’re out there, all black on trying to make dollars. Just trying to make some money and trying to eat. Survive.

    + +

    This song, I remember writing to the beat a long time ago before we actually came out. That beat is old. That was probably like a ‘89 beat. RZA had it that long because he had a bunch of breaks. He had all kind of things and he was making beats back then, but we was just picking and that beat happened to always sit around and I would be like, ‘I want that beat, so don’t give that beat to nobody.’ And he kept his word and let me have it.

    + +

    Meth came up with the hook but our dude named Raider Ruckus, this was like Meth’s homeboy back then, like they was real close, he came up with the phrase ‘cash rules everything around me.’ So when he showed Meth what it was and was like, ‘Cash rules everything around me,’ Meth was like, ‘Word, you right!’ And turned it into a movie, and I came in later that day and heard it and co-signed it.

    +
    + +

    via Complex

    +
    + + +
  • + + +
  • +
    +

    What has U-God said about the song?

    +
    + +
    +

    “C.R.E.A.M.” is a true song. Everything Inspectah Deck and Raekwon said is 100 percent true. Not one line in that entire song is a lie, or even a slight exaggeration. Deck did sell base, and he did go to jail at the age of fifteen. Rae was sticking up white boys on ball courts, rocking the same damn ’Lo sweater. And of “course, Meth on the hook was like butter on the popcorn. Meth knew the hard times, too, being out there smoking woolies and pumping crack, etc. That raspy shit he was kicking just echoed in everyone’s head long after the song was done playing.

    The realism on “C.R.E.A.M.” is what resonates with so many people all over the world. People everywhere know that sentiment of being slaves to the dollar. Cash is king, and we are its lowly subjects. That’s pretty much the case in every nation around the world, the desperation to put your life and your freedom on the line to make a couple dollars. Whether you’re working, stripping, hustling, or slinging, whether you’re a business owner or homeless, cash rules everything around us.

    + +

    Source: Raw:My Journey into Wu-Tang

    +
    + + +
  • + + +
  • +
    +

    What songs were sampled on the beat for “C.R.E.A.M.?”

    +
    + +
    +

    The vocals and background sample that can be heard on the song’s intro were taken taken from The Charmels’ 1967 song “As Long as I’ve Got You”:

    + +

    + +
    + +

    The classic keys sample that can be heard throughout the beat was also taken from the previously mentioned song:

    + +

    + +
    +
    + + +
  • + + +
  • +
    +

    What has Method Man said about the song?

    +
    + +
    +

    Meth told Complex,

    + +
    +

    ‘C.R.E.A.M.’ was the one that really put us on the map if you wanna be technical. I wasn’t there when they recorded ‘C.R.E.A.M.’ I came in after the fact. RZA was like, ‘Put a hook on this song’ and I put a hook on it. That’s how it always went. I liked doing hooks.

    + +

    The hook for that was done by my man Raider Ruckus. We used to work at the Statue of Liberty and when we were coming home we used to come up with all these made-up words that were acronyms.

    + +

    We had words like ‘BIBWAM’ which meant, ‘Bitches Is Busted Without A Man’ and all this other crazy shit. Raider Ruckus was so ill with the way he put the words together. We would call money ‘cream’ so he took each letter and made a word out of it and killed it the way he did it.

    + +

    Something like that had never been done before as far as a hook or even a way of speaking. This is just showing and proving that we paid attention in class when we was kids. You can’t do shit like that unless you got a brain in your fucking head! You got to have some level of intelligence to do something like that.

    + +

    The best acronym for a word that I heard was ‘P.R.O.J.E.C.T.S.’ by Killah Priest. He said ‘People Relying On Just Enough Cash To Survive.’ And he’s the one that came up with ‘Basic Instructions Before Leaving Earth,’ the acronym for B.I.B.L.E. This ain’t no fluke shit man.

    + +

    There’s a reason you got millions upon millions of fucking kids running around with Wu-Tang tattoos. You don’t just put something on your body permanently unless it’s official. At that time, when you’re coming out brand new and representing where you come from, everybody from that area wants you to win because they win. That’s what it was like for us.

    + +

    We were the only dudes from Staten Island doing it so everybody from Staten Island wanted us to win. Not just dudes from Staten Island, but dudes from Brooklyn too because they had peoples in the group too. Then it was just grimy niggas who loved to see real shit, saying, ‘We riding with them Wu-Tang niggas. Fuck all that shiny suit shit!’ That ain’t no take on Puff, a lot of niggas was wearing suits and shit man, but that ain’t us.

    +
    +
    + + +
  • + + + +
+
+ + +
+
+

"C.R.E.A.M." Track Info

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+ + +
+ +
+
+ + 1.   + + + + Bring Da Ruckus + + + + +
+
+ +
+
+ + 2.   + + + + Shame On A Nigga + + + + +
+
+ +
+
+ + 3.   + + + + Clan in Da Front + + + + +
+
+ +
+
+ + 4.   + + + + Wu-Tang: 7th Chamber + + + + +
+
+ + + +
+ +
+ + + +
+
+ + 8.   + + + + C.R.E.A.M. + + + + +
+
+ +
+
+ + 9.   + + + + Method Man + + + + +
+
+ +
+
+ + 10.   + + + + Protect Ya Neck + + + + +
+
+ +
+
+ + 11.   + + + + Tearz + + + + +
+
+ +
+ +
+ +
+
+ + 13.   + + + + Conclusion + + + + +
+
+ +
+ +
+
+ +
+
+
+ + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 22d5f2f0c..1355ec779 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -18,23 +18,21 @@ from __future__ import absolute_import, division, print_function import itertools +from io import open import os import re import six import sys import unittest -from mock import patch -from test import _common +import confuse +from mock import MagicMock, patch from beets import logging from beets.library import Item from beets.util import bytestring_path -import confuse - from beetsplug import lyrics - -from mock import MagicMock +from test import _common log = logging.getLogger('beets.test_lyrics') @@ -232,38 +230,11 @@ class MockFetchUrl(object): def __call__(self, url, filename=None): self.fetched = url fn = url_to_filename(url) - with open(fn, 'r') as f: + with open(fn, 'r', encoding="utf8") as f: content = f.read() return content -class GeniusMockGet(object): - - def __init__(self, pathval='fetched_path'): - self.pathval = pathval - self.fetched = None - - def __call__(self, url, headers=False): - from requests.models import Response - # for the first requests.get() return a path - if headers: - response = Response() - response.status_code = 200 - response._content = b'{"meta":{"status":200},\ - "response":{"song":{"path":"/lyrics/sample"}}}' - return response - # for the second requests.get() return the genius page - else: - from mock import PropertyMock - self.fetched = url - fn = url_to_filename(url) - with open(fn, 'r') as f: - content = f.read() - response = Response() - type(response).text = PropertyMock(return_value=content) - return response - - def is_lyrics_content_ok(title, text): """Compare lyrics text to expected lyrics for given title.""" if not text: @@ -445,8 +416,9 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest): google.is_page_candidate(url, url_title, s['title'], u'Sunn O)))') -class LyricsGeniusBaseTest(unittest.TestCase): +# test Genius backend +class GeniusBaseTest(unittest.TestCase): def setUp(self): """Set up configuration.""" try: @@ -457,28 +429,91 @@ class LyricsGeniusBaseTest(unittest.TestCase): self.skipTest("Python's built-in HTML parser is not good enough") -class LyricsGeniusScrapeTest(LyricsGeniusBaseTest): - - """Checks that Genius backend works as intended. - """ - import requests +class GeniusScrapeLyricsFromHtmlTest(GeniusBaseTest): + """tests Genius._scrape_lyrics_from_html()""" def setUp(self): """Set up configuration""" - LyricsGeniusBaseTest.setUp(self) + GeniusBaseTest.setUp(self) self.plugin = lyrics.LyricsPlugin() - @patch.object(requests, 'get', GeniusMockGet()) def test_no_lyrics_div(self): - """Ensure that `lyrics_from_song_page` doesn't crash when the html - for a Genius page doesn't contain
+ """Ensure we don't crash when the scraping the html for a genius page + doesn't contain
""" # https://github.com/beetbox/beets/issues/3535 # expected return value None - song_url = 'https://genius.com/sample' - self.assertEqual(genius.lyrics_from_song_page(song_url), - None) + url = 'https://genius.com/sample' + mock = MockFetchUrl() + self.assertEqual(genius._scrape_lyrics_from_html(mock(url)), None) + def test_good_lyrics(self): + """Ensure we are able to scrape a page with lyrics""" + url = 'https://genius.com/Wu-tang-clan-cream-lyrics' + mock = MockFetchUrl() + self.assertIsNotNone(genius._scrape_lyrics_from_html(mock(url))) + + # TODO: find an example of a lyrics page with multiple divs and test it + + +class GeniusFetchTest(GeniusBaseTest): + """tests Genius.fetch()""" + + def setUp(self): + """Set up configuration""" + GeniusBaseTest.setUp(self) + self.plugin = lyrics.LyricsPlugin() + + @patch.object(lyrics.Genius, '_scrape_lyrics_from_html') + @patch.object(lyrics.Backend, 'fetch_url', return_value=True) + def test_json(self, mock_fetch_url, mock_scrape): + """Ensure we're finding artist matches""" + with patch.object( + lyrics.Genius, '_search', return_value={ + "response": { + "hits": [ + { + "result": { + "primary_artist": { + "name": u"\u200Bblackbear", + }, + "url": "blackbear_url" + } + }, + { + "result": { + "primary_artist": { + "name": u"El\u002Dp" + }, + "url": "El-p_url" + } + } + ] + } + } + ) as mock_json: + # genius uses zero-width-spaces (\u200B) for lowercase + # artists so we make sure we can match those + self.assertIsNotNone(genius.fetch('blackbear', 'Idfc')) + mock_fetch_url.assert_called_once_with("blackbear_url") + mock_scrape.assert_called_once_with(True) + + # genius uses the hypen minus (\u002D) as their dash + self.assertIsNotNone(genius.fetch('El-p', 'Idfc')) + mock_fetch_url.assert_called_with('El-p_url') + mock_scrape.assert_called_with(True) + + # test no matching artist + self.assertIsNone(genius.fetch('doesntexist', 'none')) + + # test invalid json + mock_json.return_value = None + self.assertIsNone(genius.fetch('blackbear', 'Idfc')) + + # TODO: add integration test hitting real api + + +# test utilties class SlugTests(unittest.TestCase):