diff --git a/test/lyrics_sources.py b/test/lyrics_sources.py index c3b185a27..8f7c57e9e 100644 --- a/test/lyrics_sources.py +++ b/test/lyrics_sources.py @@ -46,6 +46,7 @@ class MockFetchUrl(object): fn = "".join(x for x in url if (x.isalnum() or x == '/')) fn = fn.split('/') fn = os.path.join('rsrc', 'lyrics', fn[0], fn[-1]) + '.txt' + with open(fn, 'r') as f: content = f.read() return content @@ -60,7 +61,7 @@ def is_lyrics_content_ok(title, text): # consider lyrics ok if they share 50% or more with the reference if len(setinter): ratio = 1.0 * max(len(setexpected), len(settext)) / len(setinter) - return (ratio > .5 and ratio < 2) + return (ratio > .5 and ratio < 2.5) return False @@ -70,51 +71,71 @@ class LyricsSourcesPluginTest(unittest.TestCase): # Use default query when possible, or override artist and title field # if website don't have lyrics for default query. sourcesOk = [ - # dict(definfo, url='http://www.songlyrics.com', - # path=u'/the-beatles/lady-madonna-lyrics'), - # dict(definfo, url=u'http://www.elyricsworld.com', - # path=u'/lady_madonna_lyrics_beatles.html'), - dict(artist=u'Beres Hammond', title=u'I could beat myself', - url=u'http://www.reggaelyrics.info', - path=u'/beres-hammond/i-could-beat-myself'), - dict(definfo, artist=u'Lilly Wood & the prick', title=u"Hey it's ok", - url=u'http://www.paroles.net/', - path=u'lilly-wood-the-prick/paroles-hey-it-s-ok'), - dict(definfo, artist=u'Amy Winehouse', title=u"Jazz'n'blues", - url=u'http://www.lyricsontop.com', - path=u'/amy-winehouse-songs/jazz-n-blues-lyrics.html'), - dict(definfo, url=u'http://www.sweetslyrics.com', - path=u'/761696.The%20Beatles%20-%20Lady%20Madonna.html'), - dict(definfo, url=u'http://www.absolutelyrics.com', + dict(definfo, + url=u'http://www.absolutelyrics.com', path=u'/lyrics/view/the_beatles/lady_madonna'), - dict(definfo, url=u'http://www.azlyrics.com/', + dict(definfo, + url=u'http://www.azlyrics.com', path=u'/lyrics/beatles/ladymadonna.html'), - dict(definfo, url=u'http://www.chartlyrics.com', + dict(definfo, + url=u'http://www.chartlyrics.com', path=u'/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx'), - dict(definfo, artist=u'Lilly Wood & the prick', title=u"Hey it's ok", - url=u'http://www.lyricsmania.com', - path=u'/hey_its_ok_lyrics_lilly_wood_and_the_prick.html'), - dict(definfo, url=u'http://www.lyrics007.com', + dict(definfo, + url=u'http://www.elyricsworld.com', + path=u'/lady_madonna_lyrics_beatles.html'), + dict(definfo, + url=u'http://www.lacoccinelle.net', + artist=u'Jacques Brel', title=u"Amsterdam", + path=u'/paroles-officielles/275679.html'), + dict(definfo, + url=u'http://www.lyrics007.com', path=u'/The%20Beatles%20Lyrics/Lady%20Madonna%20Lyrics.html'), - dict(definfo, url=u'http://www.smartlyrics.com', - path=u'/Song18148-The-Beatles-Lady-Madonna-lyrics.aspx'), - dict(definfo, url='http://www.releaselyrics.com', - path=u'/e35f/the-beatles-lady-madonna'), - dict(definfo, url='http://www.metrolyrics.com/', + dict(definfo, + url='http://www.lyrics.com/', + path=u'lady-madonna-lyrics-the-beatles.html'), + dict(definfo, + url='http://www.lyricsmania.com/', + path='lady_madonna_lyrics_the_beatles.html'), + dict(definfo, + url=u'http://www.lyrics.net', + path=u'/lyric/17547916'), + dict(definfo, + url=u'http://www.lyricsontop.com', + artist=u'Amy Winehouse', title=u"Jazz'n'blues", + path=u'/amy-winehouse-songs/jazz-n-blues-lyrics.html'), + dict(definfo, + url=u'http://lyrics.wikia.com/', + path=u'The_Beatles:Lady_Madonna'), + dict(definfo, + url='http://www.metrolyrics.com/', path='lady-madonna-lyrics-beatles.html'), + dict(definfo, + url=u'http://www.paroles.net/', + artist=u'Lilly Wood & the prick', title=u"Hey it's ok", + path=u'lilly-wood-the-prick/paroles-hey-it-s-ok'), + dict(definfo, + url=u'http://www.reggaelyrics.info', + artist=u'Beres Hammond', title=u'I could beat myself', + path=u'/beres-hammond/i-could-beat-myself'), + dict(definfo, + url='http://www.releaselyrics.com', + path=u'/e35f/the-beatles-lady-madonna'), + dict(definfo, + url=u'http://www.smartlyrics.com', + path=u'/Song18148-The-Beatles-Lady-Madonna-lyrics.aspx'), + dict(definfo, + url='http://www.songlyrics.com', + path=u'/the-beatles/lady-madonna-lyrics'), + dict(definfo, + url=u'http://www.stlyrics.com', + path=u'/songs/r/richiehavens48961/ladymadonna2069109.html'), + dict(definfo, + url=u'http://www.sweetslyrics.com', + path=u'/761696.The%20Beatles%20-%20Lady%20Madonna.html'), ] - # Websites that can't be scraped yet. - # The reason why the scraping fail is indicated before each source dict. - sourcesFail = [ - # Lyrics consist in multiple small

sections instead of a long one - #dict(definfo, artist=u'Lilly Wood & the prick', title=u"Hey it's ok", - # url=u'http://www.lacoccinelle.net', - # path=u'/paroles-officielles/550512.html'), - ] - def setUp(self): """Set up configuration""" lyrics.LyricsPlugin() @@ -134,23 +155,11 @@ class LyricsSourcesPluginTest(unittest.TestCase): def test_sources_ok(self): for s in self.sourcesOk: url = s['url'] + s['path'] + log.info('Trying to scrape lyrics from {0}'.format(url)) res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(s['title'], res), url) - def test_sources_fail(self): - for s in self.sourcesFail: - url = s['url'] + s['path'] - html = lyrics.fetch_url(url) - res = lyrics.scrape_lyrics_from_html(html) - if lyrics.is_lyrics(res): - if is_lyrics_content_ok(s['title'], res): - log.info(u'{0} can be added to sources :\n{1}' - .format(s['url'], res)) - else: - log.info(u'{0} return invalid lyrics:\n{1}'. - format(s['url'], res)) - def test_is_page_candidate(self): for s in self.sourcesOk: url = unicode(s['url'] + s['path']) diff --git a/test/rsrc/lyrics/elyricsworldcom/ladymadonnalyricsbeatleshtml.txt b/test/rsrc/lyrics/elyricsworldcom/ladymadonnalyricsbeatleshtml.txt index c0a12f6c1..fe5b7d09c 100644 --- a/test/rsrc/lyrics/elyricsworldcom/ladymadonnalyricsbeatleshtml.txt +++ b/test/rsrc/lyrics/elyricsworldcom/ladymadonnalyricsbeatleshtml.txt @@ -267,6 +267,10 @@

@@ -440,7 +440,6 @@ click: function(score, evt) {
-
Embed Video
@@ -450,14 +449,6 @@ click: function(score, evt) {
-
Embed Widget