mirror of
https://github.com/beetbox/beets.git
synced 2025-12-16 05:34:47 +01:00
bug 4406 fix
This commit is contained in:
parent
e201dd4fe5
commit
252121f537
9 changed files with 3797 additions and 2 deletions
|
|
@ -51,6 +51,7 @@ except ImportError:
|
|||
class HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
from beets.autotag.hooks import string_dist
|
||||
from beets import plugins
|
||||
from beets import ui
|
||||
import beets
|
||||
|
|
@ -461,7 +462,7 @@ class Tekstowo(Backend):
|
|||
if not song_page_html:
|
||||
return None
|
||||
|
||||
return self.extract_lyrics(song_page_html)
|
||||
return self.extract_lyrics(song_page_html, artist, title)
|
||||
|
||||
def parse_search_results(self, html):
|
||||
html = _scrape_strip_cruft(html)
|
||||
|
|
@ -493,7 +494,7 @@ class Tekstowo(Backend):
|
|||
|
||||
return self.BASE_URL + link.get('href')
|
||||
|
||||
def extract_lyrics(self, html):
|
||||
def extract_lyrics(self, html, artist, title):
|
||||
html = _scrape_strip_cruft(html)
|
||||
html = _scrape_merge_paragraphs(html)
|
||||
|
||||
|
|
@ -501,6 +502,23 @@ class Tekstowo(Backend):
|
|||
if not soup:
|
||||
return None
|
||||
|
||||
info_div = soup.find("div", class_="col-auto")
|
||||
if not info_div:
|
||||
return None
|
||||
|
||||
info_elements = info_div.find_all("a")
|
||||
if not info_elements:
|
||||
return None
|
||||
|
||||
html_title = info_elements[-1].get_text()
|
||||
html_artist = info_elements[-2].get_text()
|
||||
|
||||
title_dist = string_dist(html_title, title)
|
||||
artist_dist = string_dist(html_artist, artist)
|
||||
|
||||
if title_dist > 0.1 or artist_dist > 0.1:
|
||||
return None
|
||||
|
||||
lyrics_div = soup.select("div.song-text > div.inner-text")
|
||||
if not lyrics_div:
|
||||
return None
|
||||
|
|
|
|||
3
docs/changelog.rst
Normal file → Executable file
3
docs/changelog.rst
Normal file → Executable file
|
|
@ -117,6 +117,9 @@ Bug fixes:
|
|||
* :doc:`/plugins/lastgenre`: Fix a duplicated entry for trip hop in the
|
||||
default genre list.
|
||||
:bug:`4510`
|
||||
* :doc:`plugins/lyrics`: Fixed issue with Tekstowo backend not actually checking
|
||||
if the found song actually matches.
|
||||
:bug:`4406`
|
||||
|
||||
For packagers:
|
||||
|
||||
|
|
|
|||
910
test/rsrc/lyrics/tekstowopl/piosenka24kgoldncityofangels1.txt
Executable file
910
test/rsrc/lyrics/tekstowopl/piosenka24kgoldncityofangels1.txt
Executable file
File diff suppressed because one or more lines are too long
888
test/rsrc/lyrics/tekstowopl/piosenkabaileybiggerblackeyedsusan.txt
Executable file
888
test/rsrc/lyrics/tekstowopl/piosenkabaileybiggerblackeyedsusan.txt
Executable file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
537
test/rsrc/lyrics/tekstowopl/szukajwykonawcaagfdgjatytulagfdgafg.txt
Executable file
537
test/rsrc/lyrics/tekstowopl/szukajwykonawcaagfdgjatytulagfdgafg.txt
Executable file
File diff suppressed because one or more lines are too long
584
test/rsrc/lyrics/tekstowopl/szukajwykonawcajuicewrldtytulluciddreams.txt
Executable file
584
test/rsrc/lyrics/tekstowopl/szukajwykonawcajuicewrldtytulluciddreams.txt
Executable file
File diff suppressed because one or more lines are too long
Binary file not shown.
|
|
@ -34,6 +34,7 @@ log = logging.getLogger('beets.test_lyrics')
|
|||
raw_backend = lyrics.Backend({}, log)
|
||||
google = lyrics.Google(MagicMock(), log)
|
||||
genius = lyrics.Genius(MagicMock(), log)
|
||||
tekstowo = lyrics.Tekstowo(MagicMock(), log)
|
||||
|
||||
|
||||
class LyricsPluginTest(unittest.TestCase):
|
||||
|
|
@ -209,6 +210,7 @@ class LyricsPluginTest(unittest.TestCase):
|
|||
|
||||
def url_to_filename(url):
|
||||
url = re.sub(r'https?://|www.', '', url)
|
||||
url = re.sub(r'.html', '', url)
|
||||
fn = "".join(x for x in url if (x.isalnum() or x == '/'))
|
||||
fn = fn.split('/')
|
||||
fn = os.path.join(LYRICS_ROOT_DIR,
|
||||
|
|
@ -522,6 +524,108 @@ class GeniusFetchTest(GeniusBaseTest):
|
|||
# TODO: add integration test hitting real api
|
||||
|
||||
|
||||
# test Tekstowo
|
||||
|
||||
class TekstowoBaseTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
"""Set up configuration."""
|
||||
try:
|
||||
__import__('bs4')
|
||||
except ImportError:
|
||||
self.skipTest('Beautiful Soup 4 not available')
|
||||
|
||||
|
||||
class TekstowoExtractLyricsTest(TekstowoBaseTest):
|
||||
"""tests Tekstowo.extract_lyrics()"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up configuration"""
|
||||
TekstowoBaseTest.setUp(self)
|
||||
self.plugin = lyrics.LyricsPlugin()
|
||||
|
||||
def test_good_lyrics(self):
|
||||
"""Ensure we are able to scrape a page with lyrics"""
|
||||
url = 'https://www.tekstowo.pl/piosenka,24kgoldn,city_of_angels_1.html'
|
||||
mock = MockFetchUrl()
|
||||
self.assertIsNotNone(tekstowo.extract_lyrics(mock(url),
|
||||
'24kGoldn', 'City of Angels'))
|
||||
|
||||
def test_no_lyrics(self):
|
||||
"""Ensure we don't crash when the scraping the html for a Tekstowo page
|
||||
doesn't contain lyrics
|
||||
"""
|
||||
url = 'https://www.tekstowo.pl/piosenka,beethoven,' \
|
||||
'beethoven_piano_sonata_17_tempest_the_3rd_movement.html'
|
||||
mock = MockFetchUrl()
|
||||
self.assertEqual(tekstowo.extract_lyrics(mock(url), 'Beethoven',
|
||||
'Beethoven Piano Sonata 17'
|
||||
'Tempest The 3rd Movement'),
|
||||
None)
|
||||
|
||||
def test_song_no_match(self):
|
||||
"""Ensure we return None when a song does not match the search query"""
|
||||
# https://github.com/beetbox/beets/issues/4406
|
||||
# expected return value None
|
||||
url = 'https://www.tekstowo.pl/piosenka,bailey_bigger' \
|
||||
',black_eyed_susan.html'
|
||||
mock = MockFetchUrl()
|
||||
self.assertEqual(tekstowo.extract_lyrics(mock(url), 'Kelly Bailey',
|
||||
'Black Mesa Inbound'), None)
|
||||
|
||||
|
||||
class TekstowoParseSearchResultsTest(TekstowoBaseTest):
|
||||
"""tests Tekstowo.parse_search_results()"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up configuration"""
|
||||
TekstowoBaseTest.setUp(self)
|
||||
self.plugin = lyrics.LyricsPlugin()
|
||||
|
||||
def test_multiple_results(self):
|
||||
"""Ensure we are able to scrape a page with multiple search results"""
|
||||
url = 'https://www.tekstowo.pl/szukaj,wykonawca,juice+wrld' \
|
||||
',tytul,lucid+dreams.html'
|
||||
mock = MockFetchUrl()
|
||||
self.assertEqual(tekstowo.parse_search_results(mock(url)),
|
||||
'http://www.tekstowo.pl/piosenka,juice_wrld,'
|
||||
'lucid_dreams__remix__ft__lil_uzi_vert.html')
|
||||
|
||||
def test_no_results(self):
|
||||
"""Ensure we are able to scrape a page with no search results"""
|
||||
url = 'https://www.tekstowo.pl/szukaj,wykonawca,' \
|
||||
'agfdgja,tytul,agfdgafg.html'
|
||||
mock = MockFetchUrl()
|
||||
self.assertEqual(tekstowo.parse_search_results(mock(url)), None)
|
||||
|
||||
|
||||
class TekstowoIntegrationTest(GeniusBaseTest, LyricsAssertions):
|
||||
"""Tests Tekstowo lyric source with real requests"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up configuration"""
|
||||
TekstowoBaseTest.setUp(self)
|
||||
self.plugin = lyrics.LyricsPlugin()
|
||||
|
||||
@unittest.skipUnless(
|
||||
os.environ.get('INTEGRATION_TEST', '0') == '1',
|
||||
'integration testing not enabled')
|
||||
def test_normal(self):
|
||||
"""Ensure we can fetch a song's lyrics in the ordinary case"""
|
||||
lyrics = tekstowo.fetch('Boy in Space', 'u n eye')
|
||||
self.assertLyricsContentOk('u n eye', lyrics)
|
||||
|
||||
@unittest.skipUnless(
|
||||
os.environ.get('INTEGRATION_TEST', '0') == '1',
|
||||
'integration testing not enabled')
|
||||
def test_no_matching_results(self):
|
||||
"""Ensure we fetch nothing if there are search results
|
||||
returned but no matches"""
|
||||
# https://github.com/beetbox/beets/issues/4406
|
||||
# expected return value None
|
||||
lyrics = tekstowo.fetch('Kelly Bailey', 'Black Mesa Inbound')
|
||||
self.assertEqual(lyrics, None)
|
||||
|
||||
|
||||
# test utilties
|
||||
|
||||
class SlugTests(unittest.TestCase):
|
||||
|
|
|
|||
Loading…
Reference in a new issue