From a0ef886801de8dcc179c8be3f66b6862fcd13af1 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Sun, 12 May 2013 12:45:49 -0700 Subject: [PATCH] lyrics: substitute more punctuation (fixes #270) --- beetsplug/lyrics.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 8b480ce5c..0f4a102bf 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -36,6 +36,20 @@ DIV_RE = re.compile(r'<(/?)div>?') COMMENT_RE = re.compile(r'', re.S) TAG_RE = re.compile(r'<[^>]*>') BREAK_RE = re.compile(r'') +URL_CHARACTERS = { + u'\u2018': u"'", + u'\u2019': u"'", + u'\u201c': u'"', + u'\u201d': u'"', + u'\u2010': u'-', + u'\u2011': u'-', + u'\u2012': u'-', + u'\u2013': u'-', + u'\u2014': u'-', + u'\u2015': u'-', + u'\u2016': u'-', + u'\u2026': u'...', +} # Utilities. @@ -115,8 +129,8 @@ def _encode(s): LyricsWiki and Lyrics.com). """ if isinstance(s, unicode): - # Replace "fancy" apostrophes with straight ones. - s = s.replace(u'\u2019', u"'") + for char, repl in URL_CHARACTERS.items(): + s = s.replace(char, repl) s = s.encode('utf8', 'ignore') return urllib.quote(s)