From 76aa97827dbab08d636d3b027da641bbaa1cbb5e Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 15 Jun 2021 10:10:22 +0200
Subject: [PATCH 1/7] lyrics: rename html -> soup for consistency

---
 beetsplug/lyrics.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index f0290f74a..83b43ff22 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -373,22 +373,22 @@ class Genius(Backend):
     def _scrape_lyrics_from_html(self, html):
         """Scrape lyrics from a given genius.com html"""
 
-        html = BeautifulSoup(html, "html.parser")
+        soup = BeautifulSoup(html, "html.parser")
 
         # Remove script tags that they put in the middle of the lyrics.
-        [h.extract() for h in html('script')]
+        [h.extract() for h in soup('script')]
 
         # Most of the time, the page contains a div with class="lyrics" where
         # all of the lyrics can be found already correctly formatted
         # Sometimes, though, it packages the lyrics into separate divs, most
         # likely for easier ad placement
-        lyrics_div = html.find("div", class_="lyrics")
+        lyrics_div = soup.find("div", class_="lyrics")
         if not lyrics_div:
             self._log.debug(u'Received unusual song page html')
-            verse_div = html.find("div",
+            verse_div = soup.find("div",
                                   class_=re.compile("Lyrics__Container"))
             if not verse_div:
-                if html.find("div",
+                if soup.find("div",
                              class_=re.compile("LyricsPlaceholder__Message"),
                              string="This song is an instrumental"):
                     self._log.debug('Detected instrumental')
@@ -433,11 +433,11 @@ class Tekstowo(Backend):
         html = _scrape_merge_paragraphs(html)
 
         try:
-            html = BeautifulSoup(html, "html.parser")
+            soup = BeautifulSoup(html, "html.parser")
         except HTMLParseError:
             return None
 
-        song_rows = html.find("div", class_="content"). \
+        song_rows = soup.find("div", class_="content"). \
             find("div", class_="card"). \
             find_all("div", class_="box-przeboje")
 
@@ -457,11 +457,11 @@ class Tekstowo(Backend):
         html = _scrape_merge_paragraphs(html)
 
         try:
-            html = BeautifulSoup(html, "html.parser")
+            soup = BeautifulSoup(html, "html.parser")
         except HTMLParseError:
             return None
 
-        return html.find("div", class_="song-text").get_text()
+        return soup.find("div", class_="song-text").get_text()
 
 
 def remove_credits(text):

From f8a4c661faa0e6b5201a0fd88c74cb7f574f52b2 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 15 Jun 2021 10:18:46 +0200
Subject: [PATCH 2/7] lyrics: return None explicitly

---
 beetsplug/lyrics.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 83b43ff22..9607a5cb7 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -257,6 +257,7 @@ class Backend(object):
             return r.text
         else:
             self._log.debug(u'failed to fetch: {0} ({1})', url, r.status_code)
+            return None
 
     def fetch(self, artist, title):
         raise NotImplementedError()
@@ -286,11 +287,11 @@ class MusiXmatch(Backend):
 
         html = self.fetch_url(url)
         if not html:
-            return
+            return None
         if "We detected that your IP is blocked" in html:
             self._log.warning(u'we are blocked at MusixMatch: url %s failed'
                               % url)
-            return
+            return None
         html_parts = html.split('<p class="mxm-lyrics__content')
         # Sometimes lyrics come in 2 or more parts
         lyrics_parts = []
@@ -302,10 +303,10 @@ class MusiXmatch(Backend):
         # missing songs. this seems to happen after being blocked
         # above, when filling in the CAPTCHA.
         if "Instant lyrics for all your music." in lyrics:
-            return
+            return None
         # sometimes there are non-existent lyrics with some content
         if 'Lyrics | Musixmatch' in lyrics:
-            return
+            return None
         return lyrics
 
 
@@ -348,6 +349,7 @@ class Genius(Backend):
 
         self._log.debug(u'Genius failed to find a matching artist for \'{0}\'',
                         artist)
+        return None
 
     def _search(self, artist, title):
         """Searches the genius api for a given artist and title
@@ -654,6 +656,8 @@ class Google(Backend):
                                     item['displayLink'])
                     return lyrics
 
+        return None
+
 
 class LyricsPlugin(plugins.BeetsPlugin):
     SOURCES = ['google', 'musixmatch', 'genius', 'tekstowo']

From b34442f5d11bb118856e2c6ad9f6ae9b5ee96e36 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 15 Jun 2021 10:24:03 +0200
Subject: [PATCH 3/7] lyrics: always check for fetch_url() returning None

---
 beetsplug/lyrics.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 9607a5cb7..a6dc0bd6a 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -344,8 +344,10 @@ class Genius(Backend):
             hit_artist = hit["result"]["primary_artist"]["name"]
 
             if slug(hit_artist) == slug(artist):
-                return self._scrape_lyrics_from_html(
-                    self.fetch_url(hit["result"]["url"]))
+                html = self.fetch_url(hit["result"]["url"])
+                if not html:
+                    return None
+                return self._scrape_lyrics_from_html(html)
 
         self._log.debug(u'Genius failed to find a matching artist for \'{0}\'',
                         artist)
@@ -419,12 +421,16 @@ class Tekstowo(Backend):
     def fetch(self, artist, title):
         url = self.build_url(title, artist)
         search_results = self.fetch_url(url)
+        if not search_results:
+            return None
         song_page_url = self.parse_search_results(search_results)
 
         if not song_page_url:
             return None
 
         song_page_html = self.fetch_url(song_page_url)
+        if not song_page_html:
+            return None
         return self.extract_lyrics(song_page_html)
 
     def parse_search_results(self, html):
@@ -512,9 +518,6 @@ def scrape_lyrics_from_html(html):
     if not HAS_BEAUTIFUL_SOUP:
         return None
 
-    if not html:
-        return None
-
     def is_text_notcode(text):
         length = len(text)
         return (length > 20 and
@@ -647,6 +650,8 @@ class Google(Backend):
                                               title, artist):
                     continue
                 html = self.fetch_url(url_link)
+                if not html:
+                    continue
                 lyrics = scrape_lyrics_from_html(html)
                 if not lyrics:
                     continue

From 867d383544ba97391f81fbcc37481905dc3a7253 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 15 Jun 2021 10:30:05 +0200
Subject: [PATCH 4/7] lyrics: wrap BeautifulSoup() constructor to centralize
 error handling

also ensure that the return value is always checked for None
---
 beetsplug/lyrics.py | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index a6dc0bd6a..127a94a07 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -33,7 +33,8 @@ import six
 from six.moves import urllib
 
 try:
-    from bs4 import SoupStrainer, BeautifulSoup
+    import bs4
+    from bs4 import SoupStrainer
     HAS_BEAUTIFUL_SOUP = True
 except ImportError:
     HAS_BEAUTIFUL_SOUP = False
@@ -219,6 +220,17 @@ def slug(text):
     return re.sub(r'\W+', '-', unidecode(text).lower().strip()).strip('-')
 
 
+if HAS_BEAUTIFUL_SOUP:
+    def try_parse_html(html, **kwargs):
+        try:
+            return bs4.BeautifulSoup(html, 'html.parser', **kwargs)
+        except HTMLParseError:
+            return None
+else:
+    def try_parse_html(html, **kwargs):
+        return None
+
+
 class Backend(object):
     def __init__(self, config, log):
         self._log = log
@@ -377,7 +389,9 @@ class Genius(Backend):
     def _scrape_lyrics_from_html(self, html):
         """Scrape lyrics from a given genius.com html"""
 
-        soup = BeautifulSoup(html, "html.parser")
+        soup = try_parse_html(html)
+        if not soup:
+            return
 
         # Remove script tags that they put in the middle of the lyrics.
         [h.extract() for h in soup('script')]
@@ -440,9 +454,8 @@ class Tekstowo(Backend):
         html = _scrape_strip_cruft(html)
         html = _scrape_merge_paragraphs(html)
 
-        try:
-            soup = BeautifulSoup(html, "html.parser")
-        except HTMLParseError:
+        soup = try_parse_html(html)
+        if not soup:
             return None
 
         song_rows = soup.find("div", class_="content"). \
@@ -464,9 +477,8 @@ class Tekstowo(Backend):
         html = _scrape_strip_cruft(html)
         html = _scrape_merge_paragraphs(html)
 
-        try:
-            soup = BeautifulSoup(html, "html.parser")
-        except HTMLParseError:
+        soup = try_parse_html(html)
+        if not soup:
             return None
 
         return soup.find("div", class_="song-text").get_text()
@@ -527,10 +539,8 @@ def scrape_lyrics_from_html(html):
     html = _scrape_merge_paragraphs(html)
 
     # extract all long text blocks that are not code
-    try:
-        soup = BeautifulSoup(html, "html.parser",
-                             parse_only=SoupStrainer(text=is_text_notcode))
-    except HTMLParseError:
+    soup = try_parse_html(html, parse_only=SoupStrainer(text=is_text_notcode))
+    if not soup:
         return None
 
     # Get the longest text element (if any).

From 038cebfa9bed3c8463e84a83ced0a4ba408794c8 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 15 Jun 2021 10:34:59 +0200
Subject: [PATCH 5/7] lyrics: remove duplicate check for beautifulsoup

The plugin already disables these sources entirely when beautifulsoup is
not available
---
 beetsplug/lyrics.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 127a94a07..3a746bdd2 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -448,9 +448,6 @@ class Tekstowo(Backend):
         return self.extract_lyrics(song_page_html)
 
     def parse_search_results(self, html):
-        if not HAS_BEAUTIFUL_SOUP:
-            return None
-
         html = _scrape_strip_cruft(html)
         html = _scrape_merge_paragraphs(html)
 
@@ -527,9 +524,6 @@ def scrape_lyrics_from_html(html):
     """Scrape lyrics from a URL. If no lyrics can be found, return None
     instead.
     """
-    if not HAS_BEAUTIFUL_SOUP:
-        return None
-
     def is_text_notcode(text):
         length = len(text)
         return (length > 20 and

From 316b79f72fd5c4f39e4d14b417c7bb5fd8864e4a Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 15 Jun 2021 10:35:58 +0200
Subject: [PATCH 6/7] lyrics: slightly refactor beautifulsoup checks

instead of having a global list of sources that require the package,
indicate the dependency using an attribute of the source class
---
 beetsplug/lyrics.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 3a746bdd2..e899c0e86 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -232,6 +232,8 @@ else:
 
 
 class Backend(object):
+    REQUIRES_BS = False
+
     def __init__(self, config, log):
         self._log = log
 
@@ -329,6 +331,8 @@ class Genius(Backend):
     bigishdata.com/2016/09/27/getting-song-lyrics-from-geniuss-api-scraping/
     """
 
+    REQUIRES_BS = True
+
     base_url = "https://api.genius.com"
 
     def __init__(self, config, log):
@@ -428,6 +432,7 @@ class Genius(Backend):
 
 class Tekstowo(Backend):
     # Fetch lyrics from Tekstowo.pl.
+    REQUIRES_BS = True
 
     BASE_URL = 'http://www.tekstowo.pl'
     URL_PATTERN = BASE_URL + '/wyszukaj.html?search-title=%s&search-artist=%s'
@@ -548,6 +553,8 @@ def scrape_lyrics_from_html(html):
 class Google(Backend):
     """Fetch lyrics from Google search results."""
 
+    REQUIRES_BS = True
+
     def __init__(self, config, log):
         super(Google, self).__init__(config, log)
         self.api_key = config['google_API_key'].as_str()
@@ -670,7 +677,6 @@ class Google(Backend):
 
 class LyricsPlugin(plugins.BeetsPlugin):
     SOURCES = ['google', 'musixmatch', 'genius', 'tekstowo']
-    BS_SOURCES = ['google', 'genius', 'tekstowo']
     SOURCE_BACKENDS = {
         'google': Google,
         'musixmatch': MusiXmatch,
@@ -740,15 +746,17 @@ class LyricsPlugin(plugins.BeetsPlugin):
                          for source in sources]
 
     def sanitize_bs_sources(self, sources):
-        for source in self.BS_SOURCES:
-            if source in sources:
+        enabled_sources = []
+        for source in sources:
+            if source.REQUIRES_BS:
                 self._log.debug(u'To use the %s lyrics source, you must '
                                 u'install the beautifulsoup4 module. See '
                                 u'the documentation for further details.'
                                 % source)
-                sources.remove(source)
+            else:
+                enabled_sources.append(source)
 
-        return sources
+        return enabled_sources
 
     def get_bing_access_token(self):
         params = {

From dfd834cf8f5fcd72266e87c1fdcf5d638dfb8fe4 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 15 Jun 2021 11:01:56 +0200
Subject: [PATCH 7/7] lyrics: update changelog

---
 docs/changelog.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 69e2f01a7..5ca9a8f9c 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -380,7 +380,9 @@ Fixes:
 * Templates that use ``%ifdef`` now produce the expected behavior when used in
   conjunction with non-string fields from the :doc:`/plugins/types`.
   :bug:`3852`
-
+* :doc:`/plugins/lyrics`: Fix crashes when a website could not be retrieved,
+  affecting at least the Genius source
+  :bug:`3970`
 
 For plugin developers: