lyrics: replace apostrophes with ' (GC-498)

This commit is contained in:
Adrian Sampson 2013-01-11 10:51:22 -08:00
parent 7a410f636b
commit d6c7cfa4e3
2 changed files with 14 additions and 7 deletions

View file

@ -67,7 +67,6 @@ def extract_text(html, starttag):
_, html = html.split(starttag, 1)
except ValueError:
return
print(html)
# Walk through balanced DIV tags.
level = 0
@ -103,6 +102,16 @@ def extract_text(html, starttag):
lyrics = lyrics.strip()
return lyrics
def _encode(s):
"""Encode the string for inclusion in a URL (common to both
LyricsWiki and Lyrics.com).
"""
if isinstance(s, unicode):
# Replace "fancy" apostrophes with straight ones.
s = s.replace(u'\u2019', u"'")
s = s.encode('utf8', 'ignore')
return urllib.quote(s)
LYRICSWIKI_URL_PATTERN = 'http://lyrics.wikia.com/%s:%s'
def _lw_encode(s):
s = re.sub(r'\s+', '_', s)
@ -111,9 +120,7 @@ def _lw_encode(s):
s = s.replace("#", "Number_")
s = re.sub(r'[\[\{]', '(', s)
s = re.sub(r'[\]\}]', ')', s)
if isinstance(s, unicode):
s = s.encode('utf8', 'ignore')
return urllib.quote(s)
return _encode(s)
def fetch_lyricswiki(artist, title):
"""Fetch lyrics from LyricsWiki."""
url = LYRICSWIKI_URL_PATTERN % (_lw_encode(artist), _lw_encode(title))
@ -132,9 +139,7 @@ LYRICSCOM_NOT_FOUND = (
)
def _lc_encode(s):
s = re.sub(r'\s+', '-', s)
if isinstance(s, unicode):
s = s.encode('utf8', 'ignore')
return urllib.quote(s)
return _encode(s)
def fetch_lyricscom(artist, title):
"""Fetch lyrics from Lyrics.com."""
url = LYRICSCOM_URL_PATTERN % (_lc_encode(title), _lc_encode(artist))

View file

@ -22,6 +22,8 @@ It also adds some new features:
* :doc:`/plugins/inline`: Inline definitions can now contain statements or
blocks in addition to just expressions. Thanks to Florent Thoumie.
* :doc:`/plugins/lyrics`: Fix an issue that failed to find lyrics when metadata
contained "real" apostrophes.
1.0.0 (in development)
----------------------