From 867d383544ba97391f81fbcc37481905dc3a7253 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 15 Jun 2021 10:30:05 +0200
Subject: [PATCH] lyrics: wrap BeautifulSoup() constructor to centralize error
 handling

also ensure that the return value is always checked for None
---
 beetsplug/lyrics.py | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index a6dc0bd6a..127a94a07 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -33,7 +33,8 @@ import six
 from six.moves import urllib
 
 try:
-    from bs4 import SoupStrainer, BeautifulSoup
+    import bs4
+    from bs4 import SoupStrainer
     HAS_BEAUTIFUL_SOUP = True
 except ImportError:
     HAS_BEAUTIFUL_SOUP = False
@@ -219,6 +220,17 @@ def slug(text):
     return re.sub(r'\W+', '-', unidecode(text).lower().strip()).strip('-')
 
 
+if HAS_BEAUTIFUL_SOUP:
+    def try_parse_html(html, **kwargs):
+        try:
+            return bs4.BeautifulSoup(html, 'html.parser', **kwargs)
+        except HTMLParseError:
+            return None
+else:
+    def try_parse_html(html, **kwargs):
+        return None
+
+
 class Backend(object):
     def __init__(self, config, log):
         self._log = log
@@ -377,7 +389,9 @@ class Genius(Backend):
     def _scrape_lyrics_from_html(self, html):
         """Scrape lyrics from a given genius.com html"""
 
-        soup = BeautifulSoup(html, "html.parser")
+        soup = try_parse_html(html)
+        if not soup:
+            return
 
         # Remove script tags that they put in the middle of the lyrics.
         [h.extract() for h in soup('script')]
@@ -440,9 +454,8 @@ class Tekstowo(Backend):
         html = _scrape_strip_cruft(html)
         html = _scrape_merge_paragraphs(html)
 
-        try:
-            soup = BeautifulSoup(html, "html.parser")
-        except HTMLParseError:
+        soup = try_parse_html(html)
+        if not soup:
             return None
 
         song_rows = soup.find("div", class_="content"). \
@@ -464,9 +477,8 @@ class Tekstowo(Backend):
         html = _scrape_strip_cruft(html)
         html = _scrape_merge_paragraphs(html)
 
-        try:
-            soup = BeautifulSoup(html, "html.parser")
-        except HTMLParseError:
+        soup = try_parse_html(html)
+        if not soup:
             return None
 
         return soup.find("div", class_="song-text").get_text()
@@ -527,10 +539,8 @@ def scrape_lyrics_from_html(html):
     html = _scrape_merge_paragraphs(html)
 
     # extract all long text blocks that are not code
-    try:
-        soup = BeautifulSoup(html, "html.parser",
-                             parse_only=SoupStrainer(text=is_text_notcode))
-    except HTMLParseError:
+    soup = try_parse_html(html, parse_only=SoupStrainer(text=is_text_notcode))
+    if not soup:
         return None
 
     # Get the longest text element (if any).