mirror of
https://github.com/beetbox/beets.git
synced 2026-01-30 12:02:41 +01:00
remove str decoding as input sources are unicode
This commit is contained in:
parent
321f862f23
commit
d31a7c6b28
1 changed files with 8 additions and 10 deletions
|
|
@ -18,9 +18,10 @@ from __future__ import print_function
|
|||
|
||||
import re
|
||||
import logging
|
||||
import urllib
|
||||
import requests
|
||||
import json
|
||||
import unicodedata
|
||||
import urllib
|
||||
import difflib
|
||||
import itertools
|
||||
from HTMLParser import HTMLParseError
|
||||
|
|
@ -60,11 +61,12 @@ def fetch_url(url):
|
|||
"""Retrieve the content at a given URL, or return None if the source
|
||||
is unreachable.
|
||||
"""
|
||||
try:
|
||||
return urllib.urlopen(url).read()
|
||||
except IOError as exc:
|
||||
log.debug(u'failed to fetch: {0} ({1})'.format(url, unicode(exc)))
|
||||
return None
|
||||
r = requests.get(url)
|
||||
if r.status_code == requests.codes.ok:
|
||||
return r.text
|
||||
else:
|
||||
log.debug(u'failed to fetch: {0} ({1})'.format(url, r.status_code))
|
||||
return None
|
||||
|
||||
|
||||
def unescape(text):
|
||||
|
|
@ -367,10 +369,7 @@ def scrape_lyrics_from_html(html):
|
|||
parse_only=SoupStrainer(text=is_text_notcode))
|
||||
except HTMLParseError:
|
||||
return None
|
||||
|
||||
soup = sorted(soup.stripped_strings, key=len)[-1]
|
||||
if isinstance(soup, str):
|
||||
soup = soup.decode('utf8', 'ignore')
|
||||
return soup
|
||||
|
||||
|
||||
|
|
@ -396,7 +395,6 @@ def fetch_google(artist, title):
|
|||
urlTitle = item.get('title', u'')
|
||||
if not is_page_candidate(urlLink, urlTitle, title, artist):
|
||||
continue
|
||||
|
||||
html = fetch_url(urlLink)
|
||||
lyrics = scrape_lyrics_from_html(html)
|
||||
if not lyrics:
|
||||
|
|
|
|||
Loading…
Reference in a new issue