diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index f0290f74a..e899c0e86 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -33,7 +33,8 @@ import six from six.moves import urllib try: - from bs4 import SoupStrainer, BeautifulSoup + import bs4 + from bs4 import SoupStrainer HAS_BEAUTIFUL_SOUP = True except ImportError: HAS_BEAUTIFUL_SOUP = False @@ -219,7 +220,20 @@ def slug(text): return re.sub(r'\W+', '-', unidecode(text).lower().strip()).strip('-') +if HAS_BEAUTIFUL_SOUP: + def try_parse_html(html, **kwargs): + try: + return bs4.BeautifulSoup(html, 'html.parser', **kwargs) + except HTMLParseError: + return None +else: + def try_parse_html(html, **kwargs): + return None + + class Backend(object): + REQUIRES_BS = False + def __init__(self, config, log): self._log = log @@ -257,6 +271,7 @@ class Backend(object): return r.text else: self._log.debug(u'failed to fetch: {0} ({1})', url, r.status_code) + return None def fetch(self, artist, title): raise NotImplementedError() @@ -286,11 +301,11 @@ class MusiXmatch(Backend): html = self.fetch_url(url) if not html: - return + return None if "We detected that your IP is blocked" in html: self._log.warning(u'we are blocked at MusixMatch: url %s failed' % url) - return + return None html_parts = html.split('

20 and @@ -522,10 +538,8 @@ def scrape_lyrics_from_html(html): html = _scrape_merge_paragraphs(html) # extract all long text blocks that are not code - try: - soup = BeautifulSoup(html, "html.parser", - parse_only=SoupStrainer(text=is_text_notcode)) - except HTMLParseError: + soup = try_parse_html(html, parse_only=SoupStrainer(text=is_text_notcode)) + if not soup: return None # Get the longest text element (if any). @@ -539,6 +553,8 @@ def scrape_lyrics_from_html(html): class Google(Backend): """Fetch lyrics from Google search results.""" + REQUIRES_BS = True + def __init__(self, config, log): super(Google, self).__init__(config, log) self.api_key = config['google_API_key'].as_str() @@ -645,6 +661,8 @@ class Google(Backend): title, artist): continue html = self.fetch_url(url_link) + if not html: + continue lyrics = scrape_lyrics_from_html(html) if not lyrics: continue @@ -654,10 +672,11 @@ class Google(Backend): item['displayLink']) return lyrics + return None + class LyricsPlugin(plugins.BeetsPlugin): SOURCES = ['google', 'musixmatch', 'genius', 'tekstowo'] - BS_SOURCES = ['google', 'genius', 'tekstowo'] SOURCE_BACKENDS = { 'google': Google, 'musixmatch': MusiXmatch, @@ -727,15 +746,17 @@ class LyricsPlugin(plugins.BeetsPlugin): for source in sources] def sanitize_bs_sources(self, sources): - for source in self.BS_SOURCES: - if source in sources: + enabled_sources = [] + for source in sources: + if source.REQUIRES_BS: self._log.debug(u'To use the %s lyrics source, you must ' u'install the beautifulsoup4 module. See ' u'the documentation for further details.' % source) - sources.remove(source) + else: + enabled_sources.append(source) - return sources + return enabled_sources def get_bing_access_token(self): params = { diff --git a/docs/changelog.rst b/docs/changelog.rst index 69e2f01a7..5ca9a8f9c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -380,7 +380,9 @@ Fixes: * Templates that use ``%ifdef`` now produce the expected behavior when used in conjunction with non-string fields from the :doc:`/plugins/types`. :bug:`3852` - +* :doc:`/plugins/lyrics`: Fix crashes when a website could not be retrieved, + affecting at least the Genius source + :bug:`3970` For plugin developers: