Merge pull request #4546 from luharder/master

Force Tekstowo to check result for match, added Tekstowo test suite
This commit is contained in:
Adrian Sampson 2022-11-26 19:17:19 -08:00 committed by GitHub
commit 7cfb55b746
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 3802 additions and 2 deletions

View file

@ -51,6 +51,7 @@ except ImportError:
class HTMLParseError(Exception):
pass
from beets.autotag.hooks import string_dist
from beets import plugins
from beets import ui
import beets
@ -233,6 +234,7 @@ class Backend:
def __init__(self, config, log):
self._log = log
self.config = config
@staticmethod
def _encode(s):
@ -461,7 +463,7 @@ class Tekstowo(Backend):
if not song_page_html:
return None
return self.extract_lyrics(song_page_html)
return self.extract_lyrics(song_page_html, artist, title)
def parse_search_results(self, html):
html = _scrape_strip_cruft(html)
@ -493,7 +495,7 @@ class Tekstowo(Backend):
return self.BASE_URL + link.get('href')
def extract_lyrics(self, html):
def extract_lyrics(self, html, artist, title):
html = _scrape_strip_cruft(html)
html = _scrape_merge_paragraphs(html)
@ -501,6 +503,24 @@ class Tekstowo(Backend):
if not soup:
return None
info_div = soup.find("div", class_="col-auto")
if not info_div:
return None
info_elements = info_div.find_all("a")
if not info_elements:
return None
html_title = info_elements[-1].get_text()
html_artist = info_elements[-2].get_text()
title_dist = string_dist(html_title, title)
artist_dist = string_dist(html_artist, artist)
thresh = self.config['dist_thresh'].get(float)
if title_dist > thresh or artist_dist > thresh:
return None
lyrics_div = soup.select("div.song-text > div.inner-text")
if not lyrics_div:
return None
@ -724,6 +744,7 @@ class LyricsPlugin(plugins.BeetsPlugin):
'force': False,
'local': False,
'sources': self.SOURCES,
'dist_thresh': 0.1,
})
self.config['bing_client_secret'].redact = True
self.config['google_API_key'].redact = True

3
docs/changelog.rst Normal file → Executable file
View file

@ -119,6 +119,9 @@ Bug fixes:
* :doc:`/plugins/lastgenre`: Fix a duplicated entry for trip hop in the
default genre list.
:bug:`4510`
* :doc:`plugins/lyrics`: Fixed issue with Tekstowo backend not actually checking
if the found song matches.
:bug:`4406`
For packagers:

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

View file

@ -34,6 +34,7 @@ log = logging.getLogger('beets.test_lyrics')
raw_backend = lyrics.Backend({}, log)
google = lyrics.Google(MagicMock(), log)
genius = lyrics.Genius(MagicMock(), log)
tekstowo = lyrics.Tekstowo(MagicMock(), log)
class LyricsPluginTest(unittest.TestCase):
@ -209,6 +210,7 @@ class LyricsPluginTest(unittest.TestCase):
def url_to_filename(url):
url = re.sub(r'https?://|www.', '', url)
url = re.sub(r'.html', '', url)
fn = "".join(x for x in url if (x.isalnum() or x == '/'))
fn = fn.split('/')
fn = os.path.join(LYRICS_ROOT_DIR,
@ -522,6 +524,110 @@ class GeniusFetchTest(GeniusBaseTest):
# TODO: add integration test hitting real api
# test Tekstowo
class TekstowoBaseTest(unittest.TestCase):
def setUp(self):
"""Set up configuration."""
try:
__import__('bs4')
except ImportError:
self.skipTest('Beautiful Soup 4 not available')
class TekstowoExtractLyricsTest(TekstowoBaseTest):
"""tests Tekstowo.extract_lyrics()"""
def setUp(self):
"""Set up configuration"""
TekstowoBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
tekstowo.config = self.plugin.config
def test_good_lyrics(self):
"""Ensure we are able to scrape a page with lyrics"""
url = 'https://www.tekstowo.pl/piosenka,24kgoldn,city_of_angels_1.html'
mock = MockFetchUrl()
self.assertIsNotNone(tekstowo.extract_lyrics(mock(url),
'24kGoldn', 'City of Angels'))
def test_no_lyrics(self):
"""Ensure we don't crash when the scraping the html for a Tekstowo page
doesn't contain lyrics
"""
url = 'https://www.tekstowo.pl/piosenka,beethoven,' \
'beethoven_piano_sonata_17_tempest_the_3rd_movement.html'
mock = MockFetchUrl()
self.assertEqual(tekstowo.extract_lyrics(mock(url), 'Beethoven',
'Beethoven Piano Sonata 17'
'Tempest The 3rd Movement'),
None)
def test_song_no_match(self):
"""Ensure we return None when a song does not match the search query"""
# https://github.com/beetbox/beets/issues/4406
# expected return value None
url = 'https://www.tekstowo.pl/piosenka,bailey_bigger' \
',black_eyed_susan.html'
mock = MockFetchUrl()
self.assertEqual(tekstowo.extract_lyrics(mock(url), 'Kelly Bailey',
'Black Mesa Inbound'), None)
class TekstowoParseSearchResultsTest(TekstowoBaseTest):
"""tests Tekstowo.parse_search_results()"""
def setUp(self):
"""Set up configuration"""
TekstowoBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
def test_multiple_results(self):
"""Ensure we are able to scrape a page with multiple search results"""
url = 'https://www.tekstowo.pl/szukaj,wykonawca,juice+wrld' \
',tytul,lucid+dreams.html'
mock = MockFetchUrl()
self.assertEqual(tekstowo.parse_search_results(mock(url)),
'http://www.tekstowo.pl/piosenka,juice_wrld,'
'lucid_dreams__remix__ft__lil_uzi_vert.html')
def test_no_results(self):
"""Ensure we are able to scrape a page with no search results"""
url = 'https://www.tekstowo.pl/szukaj,wykonawca,' \
'agfdgja,tytul,agfdgafg.html'
mock = MockFetchUrl()
self.assertEqual(tekstowo.parse_search_results(mock(url)), None)
class TekstowoIntegrationTest(TekstowoBaseTest, LyricsAssertions):
"""Tests Tekstowo lyric source with real requests"""
def setUp(self):
"""Set up configuration"""
TekstowoBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
tekstowo.config = self.plugin.config
@unittest.skipUnless(
os.environ.get('INTEGRATION_TEST', '0') == '1',
'integration testing not enabled')
def test_normal(self):
"""Ensure we can fetch a song's lyrics in the ordinary case"""
lyrics = tekstowo.fetch('Boy in Space', 'u n eye')
self.assertLyricsContentOk('u n eye', lyrics)
@unittest.skipUnless(
os.environ.get('INTEGRATION_TEST', '0') == '1',
'integration testing not enabled')
def test_no_matching_results(self):
"""Ensure we fetch nothing if there are search results
returned but no matches"""
# https://github.com/beetbox/beets/issues/4406
# expected return value None
lyrics = tekstowo.fetch('Kelly Bailey', 'Black Mesa Inbound')
self.assertEqual(lyrics, None)
# test utilties
class SlugTests(unittest.TestCase):