From 1b35a5df0db707150e4a790ec2e3dfc8f4f2357b Mon Sep 17 00:00:00 2001
From: Lucas Magno <lmagno94@gmail.com>
Date: Sun, 8 Oct 2017 09:13:51 -0300
Subject: [PATCH 1/2] Fetch lyrics from Genius through scraper

---
 beetsplug/lyrics.py | 113 +++++++++++++-------------------------------
 1 file changed, 33 insertions(+), 80 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index d7fca27c2..025a1374c 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -335,8 +335,11 @@ class MusiXmatch(SymbolsReplaced):
 
 
 class Genius(Backend):
-    """Fetch lyrics from Genius via genius-api."""
-
+    """Fetch lyrics from Genius via genius-api.
+       Simply adapted from https://bigishdata.com/2016/09/27/getting-song-lyrics-from-geniuss-api-scraping/"""
+    
+    base_url = "https://api.genius.com"
+    
     def __init__(self, config, log):
         super(Genius, self).__init__(config, log)
         self.api_key = config['genius_api_key'].as_str()
@@ -345,85 +348,35 @@ class Genius(Backend):
             'User-Agent': USER_AGENT,
         }
 
-    def search_genius(self, artist, title):
-        query = u"%s %s" % (artist, title)
-        url = u'https://api.genius.com/search?q=%s' \
-            % (urllib.parse.quote(query.encode('utf-8')))
-
-        self._log.debug(u'genius: requesting search {}', url)
-        try:
-            req = requests.get(
-                url,
-                headers=self.headers,
-                allow_redirects=True
-            )
-            req.raise_for_status()
-        except requests.RequestException as exc:
-            self._log.debug(u'genius: request error: {}', exc)
-            return None
-
-        try:
-            return req.json()
-        except ValueError:
-            self._log.debug(u'genius: invalid response: {}', req.text)
-            return None
-
-    def get_lyrics(self, link):
-        url = u'http://genius-api.com/api/lyricsInfo'
-
-        self._log.debug(u'genius: requesting lyrics for link {}', link)
-        try:
-            req = requests.post(
-                url,
-                data={'link': link},
-                headers=self.headers,
-                allow_redirects=True
-            )
-            req.raise_for_status()
-        except requests.RequestException as exc:
-            self._log.debug(u'genius: request error: {}', exc)
-            return None
-
-        try:
-            return req.json()
-        except ValueError:
-            self._log.debug(u'genius: invalid response: {}', req.text)
-            return None
-
-    def build_lyric_string(self, lyrics):
-        if 'lyrics' not in lyrics:
-            return
-        sections = lyrics['lyrics']['sections']
-
-        lyrics_list = []
-        for section in sections:
-            lyrics_list.append(section['name'])
-            lyrics_list.append('\n')
-            for verse in section['verses']:
-                if 'content' in verse:
-                    lyrics_list.append(verse['content'])
-
-        return ''.join(lyrics_list)
+    def lyrics_from_song_api_path(self, song_api_path):
+      song_url = self.base_url + song_api_path
+      response = requests.get(song_url, headers=self.headers)
+      json = response.json()
+      path = json["response"]["song"]["path"]
+      #gotta go regular html scraping... come on Genius
+      page_url = "https://genius.com" + path
+      page = requests.get(page_url)
+      html = BeautifulSoup(page.text, "html.parser")
+      #remove script tags that they put in the middle of the lyrics
+      [h.extract() for h in html('script')]
+      #at least Genius is nice and has a tag called 'lyrics'!
+      lyrics = html.find("div", class_="lyrics").get_text() #updated css where the lyrics are based in HTML
+      return lyrics
 
     def fetch(self, artist, title):
-        search_data = self.search_genius(artist, title)
-        if not search_data:
-            return
-
-        if not search_data['meta']['status'] == 200:
-            return
-        else:
-            records = search_data['response']['hits']
-            if not records:
-                return
-
-            record_url = records[0]['result']['url']
-            lyric_data = self.get_lyrics(record_url)
-            if not lyric_data:
-                return
-            lyrics = self.build_lyric_string(lyric_data)
-
-            return lyrics
+      search_url = self.base_url + "/search"
+      data = {'q': title}
+      response = requests.get(search_url, data=data, headers=self.headers)
+      json = response.json()
+      
+      song_info = None
+      for hit in json["response"]["hits"]:
+        if hit["result"]["primary_artist"]["name"] == artist:
+          song_info = hit
+          break
+      if song_info:
+        song_api_path = song_info["result"]["api_path"]
+        return self.lyrics_from_song_api_path(song_api_path)
 
 
 class LyricsWiki(SymbolsReplaced):
@@ -638,7 +591,7 @@ class Google(Backend):
 
 
 class LyricsPlugin(plugins.BeetsPlugin):
-    SOURCES = ['google', 'lyricwiki', 'musixmatch']
+    SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius']
     SOURCE_BACKENDS = {
         'google': Google,
         'lyricwiki': LyricsWiki,

From fc2d379fb529000f825017e635a0baf0e86a1242 Mon Sep 17 00:00:00 2001
From: Lucas Magno <lmagno94@gmail.com>
Date: Mon, 9 Oct 2017 06:22:42 -0300
Subject: [PATCH 2/2] Comply with PEP8

---
 beetsplug/lyrics.py | 67 +++++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 025a1374c..1987b67e7 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -336,10 +336,13 @@ class MusiXmatch(SymbolsReplaced):
 
 class Genius(Backend):
     """Fetch lyrics from Genius via genius-api.
-       Simply adapted from https://bigishdata.com/2016/09/27/getting-song-lyrics-from-geniuss-api-scraping/"""
-    
+
+    Simply adapted from
+    bigishdata.com/2016/09/27/getting-song-lyrics-from-geniuss-api-scraping/
+    """
+
     base_url = "https://api.genius.com"
-    
+
     def __init__(self, config, log):
         super(Genius, self).__init__(config, log)
         self.api_key = config['genius_api_key'].as_str()
@@ -349,34 +352,40 @@ class Genius(Backend):
         }
 
     def lyrics_from_song_api_path(self, song_api_path):
-      song_url = self.base_url + song_api_path
-      response = requests.get(song_url, headers=self.headers)
-      json = response.json()
-      path = json["response"]["song"]["path"]
-      #gotta go regular html scraping... come on Genius
-      page_url = "https://genius.com" + path
-      page = requests.get(page_url)
-      html = BeautifulSoup(page.text, "html.parser")
-      #remove script tags that they put in the middle of the lyrics
-      [h.extract() for h in html('script')]
-      #at least Genius is nice and has a tag called 'lyrics'!
-      lyrics = html.find("div", class_="lyrics").get_text() #updated css where the lyrics are based in HTML
-      return lyrics
+        song_url = self.base_url + song_api_path
+        response = requests.get(song_url, headers=self.headers)
+        json = response.json()
+        path = json["response"]["song"]["path"]
+
+        # Gotta go regular html scraping... come on Genius.
+        page_url = "https://genius.com" + path
+        page = requests.get(page_url)
+        html = BeautifulSoup(page.text, "html.parser")
+
+        # Remove script tags that they put in the middle of the lyrics.
+        [h.extract() for h in html('script')]
+
+        # At least Genius is nice and has a tag called 'lyrics'!
+        # Updated css where the lyrics are based in HTML.
+        lyrics = html.find("div", class_="lyrics").get_text()
+
+        return lyrics
 
     def fetch(self, artist, title):
-      search_url = self.base_url + "/search"
-      data = {'q': title}
-      response = requests.get(search_url, data=data, headers=self.headers)
-      json = response.json()
-      
-      song_info = None
-      for hit in json["response"]["hits"]:
-        if hit["result"]["primary_artist"]["name"] == artist:
-          song_info = hit
-          break
-      if song_info:
-        song_api_path = song_info["result"]["api_path"]
-        return self.lyrics_from_song_api_path(song_api_path)
+        search_url = self.base_url + "/search"
+        data = {'q': title}
+        response = requests.get(search_url, data=data, headers=self.headers)
+        json = response.json()
+
+        song_info = None
+        for hit in json["response"]["hits"]:
+            if hit["result"]["primary_artist"]["name"] == artist:
+                song_info = hit
+                break
+
+        if song_info:
+            song_api_path = song_info["result"]["api_path"]
+            return self.lyrics_from_song_api_path(song_api_path)
 
 
 class LyricsWiki(SymbolsReplaced):