From 879fc8d3cac9de794d374111200edcd7b2c47d8e Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Wed, 24 Sep 2014 18:04:37 +0200 Subject: [PATCH] add LyricsGooglePluginTest to test_lyrics.py --- test/lyrics_sources.py | 178 ----------------------------------------- test/test_lyrics.py | 140 ++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 178 deletions(-) delete mode 100644 test/lyrics_sources.py diff --git a/test/lyrics_sources.py b/test/lyrics_sources.py deleted file mode 100644 index 96658998d..000000000 --- a/test/lyrics_sources.py +++ /dev/null @@ -1,178 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Fabrice Laporte. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Tests for the 'lyrics' plugin""" - -import os -import logging -import _common -from _common import unittest -from beetsplug import lyrics -from beets import config -from beets.util import confit -from bs4 import BeautifulSoup - -log = logging.getLogger('beets') -LYRICS_TEXTS = confit.load_yaml(os.path.join(_common.RSRC, 'lyricstext.yaml')) - -try: - googlekey = config['lyrics']['google_API_key'].get(unicode) -except confit.NotFoundError: - googlekey = None - -# default query for tests -definfo = dict(artist=u'The Beatles', title=u'Lady Madonna') - - -class MockFetchUrl(object): - def __init__(self, pathval='fetched_path'): - self.pathval = pathval - self.fetched = None - - def __call__(self, url, filename=None): - self.fetched = url - url = url.replace('http://', '').replace('www.', '') - fn = "".join(x for x in url if (x.isalnum() or x == '/')) - fn = fn.split('/') - fn = os.path.join('rsrc', 'lyrics', fn[0], fn[-1]) + '.txt' - - with open(fn, 'r') as f: - content = f.read() - return content - - -def is_lyrics_content_ok(title, text): - """Compare lyrics text to expected lyrics for given title""" - - setexpected = set(LYRICS_TEXTS[lyrics.slugify(title)].split()) - settext = set(text.split()) - setinter = setexpected.intersection(settext) - # consider lyrics ok if they share 50% or more with the reference - if len(setinter): - ratio = 1.0 * max(len(setexpected), len(settext)) / len(setinter) - return (ratio > .5 and ratio < 2.5) - return False - - -class LyricsSourcesPluginTest(unittest.TestCase): - # Every source entered in default beets google custom search engine - # must be listed below. - # Use default query when possible, or override artist and title field - # if website don't have lyrics for default query. - sourcesOk = [ - dict(definfo, - url=u'http://www.absolutelyrics.com', - path=u'/lyrics/view/the_beatles/lady_madonna'), - dict(definfo, - url=u'http://www.azlyrics.com', - path=u'/lyrics/beatles/ladymadonna.html'), - dict(definfo, - url=u'http://www.chartlyrics.com', - path=u'/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx'), - dict(definfo, - url=u'http://www.elyricsworld.com', - path=u'/lady_madonna_lyrics_beatles.html'), - dict(definfo, - url=u'http://www.lacoccinelle.net', - artist=u'Jacques Brel', title=u"Amsterdam", - path=u'/paroles-officielles/275679.html'), - dict(definfo, - url=u'http://www.lyrics007.com', - path=u'/The%20Beatles%20Lyrics/Lady%20Madonna%20Lyrics.html'), - dict(definfo, - url='http://www.lyrics.com/', - path=u'lady-madonna-lyrics-the-beatles.html'), - dict(definfo, - url='http://www.lyricsmania.com/', - path='lady_madonna_lyrics_the_beatles.html'), - dict(definfo, - url=u'http://www.lyrics.net', - path=u'/lyric/17547916'), - dict(definfo, - url=u'http://www.lyricsontop.com', - artist=u'Amy Winehouse', title=u"Jazz'n'blues", - path=u'/amy-winehouse-songs/jazz-n-blues-lyrics.html'), - dict(definfo, - url=u'http://lyrics.wikia.com/', - path=u'The_Beatles:Lady_Madonna'), - dict(definfo, - url='http://www.metrolyrics.com/', - path='lady-madonna-lyrics-beatles.html'), - dict(definfo, - url=u'http://www.paroles.net/', - artist=u'Lilly Wood & the prick', title=u"Hey it's ok", - path=u'lilly-wood-the-prick/paroles-hey-it-s-ok'), - dict(definfo, - url=u'http://www.reggaelyrics.info', - artist=u'Beres Hammond', title=u'I could beat myself', - path=u'/beres-hammond/i-could-beat-myself'), - dict(definfo, - url='http://www.releaselyrics.com', - path=u'/e35f/the-beatles-lady-madonna'), - dict(definfo, - url=u'http://www.smartlyrics.com', - path=u'/Song18148-The-Beatles-Lady-Madonna-lyrics.aspx'), - dict(definfo, - url='http://www.songlyrics.com', - path=u'/the-beatles/lady-madonna-lyrics'), - dict(definfo, - url=u'http://www.stlyrics.com', - path=u'/songs/r/richiehavens48961/ladymadonna2069109.html'), - dict(definfo, - url=u'http://www.sweetslyrics.com', - path=u'/761696.The%20Beatles%20-%20Lady%20Madonna.html'), - ] - - - def setUp(self): - """Set up configuration""" - lyrics.LyricsPlugin() - lyrics.fetch_url = MockFetchUrl() - - def test_default_ok(self): - """Test each lyrics engine with the default query""" - - for f in (lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom): - res = f(definfo['artist'], definfo['title']) - self.assertTrue(lyrics.is_lyrics(res)) - self.assertTrue(is_lyrics_content_ok(definfo['title'], res)) - - def test_missing_lyrics(self): - self.assertFalse(lyrics.is_lyrics(LYRICS_TEXTS['missing_texts'])) - - def test_sources_ok(self): - for s in self.sourcesOk: - url = s['url'] + s['path'] - log.info('Scraping lyrics from {0}'.format(url)) - res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) - self.assertTrue(lyrics.is_lyrics(res), url) - self.assertTrue(is_lyrics_content_ok(s['title'], res), url) - - def test_is_page_candidate(self): - for s in self.sourcesOk: - url = unicode(s['url'] + s['path']) - html = lyrics.fetch_url(url) - soup = BeautifulSoup(html) - if not soup.title: - continue - self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, - s['title'], s['artist']), - True, url) - - -def suite(): - return unittest.TestLoader().loadTestsFromName(__name__) - -if __name__ == '__main__': - unittest.main(defaultTest='suite') diff --git a/test/test_lyrics.py b/test/test_lyrics.py index ce53d7465..00bea692a 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -15,9 +15,13 @@ """Tests for the 'lyrics' plugin.""" +import os +import _common from _common import unittest from beetsplug import lyrics from beets.library import Item +from beets import config +from beets.util import confit class LyricsPluginTest(unittest.TestCase): @@ -145,6 +149,142 @@ class LyricsPluginTest(unittest.TestCase): self.assertEqual(lyrics._scrape_merge_paragraphs(text), "one\ntwo\nthree") + + + + +LYRICS_TEXTS = confit.load_yaml(os.path.join(_common.RSRC, 'lyricstext.yaml')) +definfo = dict(artist=u'The Beatles', title=u'Lady Madonna') # default query + + +class MockFetchUrl(object): + def __init__(self, pathval='fetched_path'): + self.pathval = pathval + self.fetched = None + + def __call__(self, url, filename=None): + self.fetched = url + url = url.replace('http://', '').replace('www.', '') + fn = "".join(x for x in url if (x.isalnum() or x == '/')) + fn = fn.split('/') + fn = os.path.join('rsrc', 'lyrics', fn[0], fn[-1]) + '.txt' + + with open(fn, 'r') as f: + content = f.read() + return content + + +def is_lyrics_content_ok(title, text): + """Compare lyrics text to expected lyrics for given title""" + + setexpected = set(LYRICS_TEXTS[lyrics.slugify(title)].split()) + settext = set(text.split()) + setinter = setexpected.intersection(settext) + # consider lyrics ok if they share 50% or more with the reference + if len(setinter): + ratio = 1.0 * max(len(setexpected), len(settext)) / len(setinter) + return (ratio > .5 and ratio < 2.5) + return False + + +class LyricsGooglePluginTest(unittest.TestCase): + # Every source entered in default beets google custom search engine + # must be listed below. + # Use default query when possible, or override artist and title field + # if website don't have lyrics for default query. + sourcesOk = [ + dict(definfo, + url=u'http://www.absolutelyrics.com', + path=u'/lyrics/view/the_beatles/lady_madonna'), + dict(definfo, + url=u'http://www.azlyrics.com', + path=u'/lyrics/beatles/ladymadonna.html'), + dict(definfo, + url=u'http://www.chartlyrics.com', + path=u'/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx'), + dict(definfo, + url=u'http://www.elyricsworld.com', + path=u'/lady_madonna_lyrics_beatles.html'), + dict(definfo, + url=u'http://www.lacoccinelle.net', + artist=u'Jacques Brel', title=u"Amsterdam", + path=u'/paroles-officielles/275679.html'), + dict(definfo, + url=u'http://www.lyrics007.com', + path=u'/The%20Beatles%20Lyrics/Lady%20Madonna%20Lyrics.html'), + dict(definfo, + url='http://www.lyrics.com/', + path=u'lady-madonna-lyrics-the-beatles.html'), + dict(definfo, + url='http://www.lyricsmania.com/', + path='lady_madonna_lyrics_the_beatles.html'), + dict(definfo, + url=u'http://www.lyrics.net', + path=u'/lyric/17547916'), + dict(definfo, + url=u'http://www.lyricsontop.com', + artist=u'Amy Winehouse', title=u"Jazz'n'blues", + path=u'/amy-winehouse-songs/jazz-n-blues-lyrics.html'), + dict(definfo, + url=u'http://lyrics.wikia.com/', + path=u'The_Beatles:Lady_Madonna'), + dict(definfo, + url='http://www.metrolyrics.com/', + path='lady-madonna-lyrics-beatles.html'), + dict(definfo, + url=u'http://www.paroles.net/', + artist=u'Lilly Wood & the prick', title=u"Hey it's ok", + path=u'lilly-wood-the-prick/paroles-hey-it-s-ok'), + dict(definfo, + url=u'http://www.reggaelyrics.info', + artist=u'Beres Hammond', title=u'I could beat myself', + path=u'/beres-hammond/i-could-beat-myself'), + dict(definfo, + url='http://www.releaselyrics.com', + path=u'/e35f/the-beatles-lady-madonna'), + dict(definfo, + url=u'http://www.smartlyrics.com', + path=u'/Song18148-The-Beatles-Lady-Madonna-lyrics.aspx'), + dict(definfo, + url='http://www.songlyrics.com', + path=u'/the-beatles/lady-madonna-lyrics'), + dict(definfo, + url=u'http://www.stlyrics.com', + path=u'/songs/r/richiehavens48961/ladymadonna2069109.html'), + dict(definfo, + url=u'http://www.sweetslyrics.com', + path=u'/761696.The%20Beatles%20-%20Lady%20Madonna.html')] + + def setUp(self): + """Set up configuration""" + + try: + __import__('bs4') + except ImportError: + self.skipTest('Beautiful Soup 4 not available') + + lyrics.LyricsPlugin() + lyrics.fetch_url = MockFetchUrl() + + def test_default_ok(self): + """Test each lyrics engine with the default query""" + + for f in (lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom): + res = f(definfo['artist'], definfo['title']) + self.assertTrue(lyrics.is_lyrics(res)) + self.assertTrue(is_lyrics_content_ok(definfo['title'], res)) + + def test_missing_lyrics(self): + self.assertFalse(lyrics.is_lyrics(LYRICS_TEXTS['missing_texts'])) + + def test_sources_ok(self): + for s in self.sourcesOk: + url = s['url'] + s['path'] + res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) + self.assertTrue(lyrics.is_lyrics(res), url) + self.assertTrue(is_lyrics_content_ok(s['title'], res), url) + + def suite(): return unittest.TestLoader().loadTestsFromName(__name__)