bug 4406 fix

This commit is contained in:
Luke Harder 2022-11-10 12:54:45 -05:00
parent e201dd4fe5
commit 252121f537
9 changed files with 3797 additions and 2 deletions

View file

@ -51,6 +51,7 @@ except ImportError:
class HTMLParseError(Exception):
pass
from beets.autotag.hooks import string_dist
from beets import plugins
from beets import ui
import beets
@ -461,7 +462,7 @@ class Tekstowo(Backend):
if not song_page_html:
return None
return self.extract_lyrics(song_page_html)
return self.extract_lyrics(song_page_html, artist, title)
def parse_search_results(self, html):
html = _scrape_strip_cruft(html)
@ -493,7 +494,7 @@ class Tekstowo(Backend):
return self.BASE_URL + link.get('href')
def extract_lyrics(self, html):
def extract_lyrics(self, html, artist, title):
html = _scrape_strip_cruft(html)
html = _scrape_merge_paragraphs(html)
@ -501,6 +502,23 @@ class Tekstowo(Backend):
if not soup:
return None
info_div = soup.find("div", class_="col-auto")
if not info_div:
return None
info_elements = info_div.find_all("a")
if not info_elements:
return None
html_title = info_elements[-1].get_text()
html_artist = info_elements[-2].get_text()
title_dist = string_dist(html_title, title)
artist_dist = string_dist(html_artist, artist)
if title_dist > 0.1 or artist_dist > 0.1:
return None
lyrics_div = soup.select("div.song-text > div.inner-text")
if not lyrics_div:
return None

3
docs/changelog.rst Normal file → Executable file
View file

@ -117,6 +117,9 @@ Bug fixes:
* :doc:`/plugins/lastgenre`: Fix a duplicated entry for trip hop in the
default genre list.
:bug:`4510`
* :doc:`plugins/lyrics`: Fixed issue with Tekstowo backend not actually checking
if the found song actually matches.
:bug:`4406`
For packagers:

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

View file

@ -34,6 +34,7 @@ log = logging.getLogger('beets.test_lyrics')
raw_backend = lyrics.Backend({}, log)
google = lyrics.Google(MagicMock(), log)
genius = lyrics.Genius(MagicMock(), log)
tekstowo = lyrics.Tekstowo(MagicMock(), log)
class LyricsPluginTest(unittest.TestCase):
@ -209,6 +210,7 @@ class LyricsPluginTest(unittest.TestCase):
def url_to_filename(url):
url = re.sub(r'https?://|www.', '', url)
url = re.sub(r'.html', '', url)
fn = "".join(x for x in url if (x.isalnum() or x == '/'))
fn = fn.split('/')
fn = os.path.join(LYRICS_ROOT_DIR,
@ -522,6 +524,108 @@ class GeniusFetchTest(GeniusBaseTest):
# TODO: add integration test hitting real api
# test Tekstowo
class TekstowoBaseTest(unittest.TestCase):
def setUp(self):
"""Set up configuration."""
try:
__import__('bs4')
except ImportError:
self.skipTest('Beautiful Soup 4 not available')
class TekstowoExtractLyricsTest(TekstowoBaseTest):
"""tests Tekstowo.extract_lyrics()"""
def setUp(self):
"""Set up configuration"""
TekstowoBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
def test_good_lyrics(self):
"""Ensure we are able to scrape a page with lyrics"""
url = 'https://www.tekstowo.pl/piosenka,24kgoldn,city_of_angels_1.html'
mock = MockFetchUrl()
self.assertIsNotNone(tekstowo.extract_lyrics(mock(url),
'24kGoldn', 'City of Angels'))
def test_no_lyrics(self):
"""Ensure we don't crash when the scraping the html for a Tekstowo page
doesn't contain lyrics
"""
url = 'https://www.tekstowo.pl/piosenka,beethoven,' \
'beethoven_piano_sonata_17_tempest_the_3rd_movement.html'
mock = MockFetchUrl()
self.assertEqual(tekstowo.extract_lyrics(mock(url), 'Beethoven',
'Beethoven Piano Sonata 17'
'Tempest The 3rd Movement'),
None)
def test_song_no_match(self):
"""Ensure we return None when a song does not match the search query"""
# https://github.com/beetbox/beets/issues/4406
# expected return value None
url = 'https://www.tekstowo.pl/piosenka,bailey_bigger' \
',black_eyed_susan.html'
mock = MockFetchUrl()
self.assertEqual(tekstowo.extract_lyrics(mock(url), 'Kelly Bailey',
'Black Mesa Inbound'), None)
class TekstowoParseSearchResultsTest(TekstowoBaseTest):
"""tests Tekstowo.parse_search_results()"""
def setUp(self):
"""Set up configuration"""
TekstowoBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
def test_multiple_results(self):
"""Ensure we are able to scrape a page with multiple search results"""
url = 'https://www.tekstowo.pl/szukaj,wykonawca,juice+wrld' \
',tytul,lucid+dreams.html'
mock = MockFetchUrl()
self.assertEqual(tekstowo.parse_search_results(mock(url)),
'http://www.tekstowo.pl/piosenka,juice_wrld,'
'lucid_dreams__remix__ft__lil_uzi_vert.html')
def test_no_results(self):
"""Ensure we are able to scrape a page with no search results"""
url = 'https://www.tekstowo.pl/szukaj,wykonawca,' \
'agfdgja,tytul,agfdgafg.html'
mock = MockFetchUrl()
self.assertEqual(tekstowo.parse_search_results(mock(url)), None)
class TekstowoIntegrationTest(GeniusBaseTest, LyricsAssertions):
"""Tests Tekstowo lyric source with real requests"""
def setUp(self):
"""Set up configuration"""
TekstowoBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
@unittest.skipUnless(
os.environ.get('INTEGRATION_TEST', '0') == '1',
'integration testing not enabled')
def test_normal(self):
"""Ensure we can fetch a song's lyrics in the ordinary case"""
lyrics = tekstowo.fetch('Boy in Space', 'u n eye')
self.assertLyricsContentOk('u n eye', lyrics)
@unittest.skipUnless(
os.environ.get('INTEGRATION_TEST', '0') == '1',
'integration testing not enabled')
def test_no_matching_results(self):
"""Ensure we fetch nothing if there are search results
returned but no matches"""
# https://github.com/beetbox/beets/issues/4406
# expected return value None
lyrics = tekstowo.fetch('Kelly Bailey', 'Black Mesa Inbound')
self.assertEqual(lyrics, None)
# test utilties
class SlugTests(unittest.TestCase):