lyrics: isolate test configuration

(#5102) Refactor lyrics tests which depended on local developer beets
configuration.
This commit is contained in:
Šarūnas Nejus 2025-01-12 20:23:38 +00:00
parent 29a3dd5084
commit b9bc2cbc04
No known key found for this signature in database
GPG key ID: DD28F6704DBE3435
2 changed files with 146 additions and 198 deletions

View file

@ -41,6 +41,10 @@ Bug fixes:
* Fix ambiguous column name ``sqlite3.OperationalError`` that occured in album
queries that filtered album track titles, for example ``beet list -a keyword
title:foo``.
* :doc:`plugins/lyrics`: Rewrite lyrics tests using pytest to provide isolated
configuration for each test case. This fixes the issue where some tests
failed because they read developers' local lyrics configuration.
:bug:`5133`
For packagers:

View file

@ -18,14 +18,15 @@ import itertools
import os
import re
import unittest
from unittest.mock import MagicMock, patch
from unittest.mock import patch
import pytest
import requests
from bs4 import BeautifulSoup, SoupStrainer
from beets import logging
from beets.library import Item
from beets.test import _common
from beets.test.helper import PluginMixin
from beets.util import bytestring_path
from beetsplug import lyrics
@ -35,13 +36,6 @@ PHRASE_BY_TITLE = {
"Beets song": "via plugins, beets becomes a panacea",
}
log = logging.getLogger("beets.test_lyrics")
raw_backend = lyrics.Backend({}, log)
google = lyrics.Google(MagicMock(), log)
genius = lyrics.Genius(MagicMock(), log)
tekstowo = lyrics.Tekstowo(MagicMock(), log)
lrclib = lyrics.LRCLib(MagicMock(), log)
def xfail_on_ci(msg: str) -> pytest.MarkDecorator:
return pytest.mark.xfail(
@ -159,19 +153,6 @@ class LyricsPluginTest(unittest.TestCase):
if the beat aint crackin"""
assert lyrics.remove_credits(text) == text
def test_is_lyrics(self):
texts = ["LyricsMania.com - Copyright (c) 2013 - All Rights Reserved"]
texts += [
"""All material found on this site is property\n
of mywickedsongtext brand"""
]
for t in texts:
assert not google.is_lyrics(t)
def test_slugify(self):
text = "http://site.com/\xe7afe-au_lait(boisson)"
assert google.slugify(text) == "http://site.com/cafe_au_lait"
def test_scrape_strip_cruft(self):
text = """<!--lyrics below-->
&nbsp;one
@ -193,15 +174,6 @@ class LyricsPluginTest(unittest.TestCase):
text = "one</p> <p class='myclass'>two</p><p>three"
assert lyrics._scrape_merge_paragraphs(text) == "one\ntwo\nthree"
def test_missing_lyrics(self):
lyrics = """
Lyricsmania staff is working hard for you to add $TITLE lyrics as soon
as they'll be released by $ARTIST, check back soon!
In case you have the lyrics to $TITLE and want to send them to us, fill out
the following form.
"""
assert not google.is_lyrics(lyrics)
def url_to_filename(url):
url = re.sub(r"https?://|www.", "", url)
@ -232,17 +204,55 @@ class MockFetchUrl:
LYRICS_ROOT_DIR = os.path.join(_common.RSRC, b"lyrics")
class LyricsGoogleBaseTest(unittest.TestCase):
def setUp(self):
"""Set up configuration."""
try:
__import__("bs4")
except ImportError:
self.skipTest("Beautiful Soup 4 not available")
class LyricsBackendTest(PluginMixin):
plugin = "lyrics"
@pytest.fixture
def plugin_config(self):
"""Return lyrics configuration to test."""
return {}
@pytest.fixture
def backend(self, backend_name, plugin_config):
"""Set configuration and returns the backend instance."""
plugin_config["sources"] = [backend_name]
self.config[self.plugin].set(plugin_config)
lyrics_plugin = lyrics.LyricsPlugin()
return lyrics_plugin.backends[0]
@pytest.mark.integration_test
def test_backend_source(self, backend):
"""Test default backends with a song known to exist in respective
databases.
"""
title = "Lady Madonna"
res = backend.fetch("The Beatles", title)
assert res
assert PHRASE_BY_TITLE[title] in res.lower()
@pytest.mark.integration_test
class TestSources:
class TestGoogleLyrics(LyricsBackendTest):
"""Test scraping heuristics on a fake html page."""
source = dict(
url="http://www.example.com",
artist="John Doe",
title="Beets song",
path="/lyrics/beetssong",
)
@pytest.fixture(scope="class")
def backend_name(self):
return "google"
@pytest.fixture(scope="class")
def plugin_config(self):
return {"google_API_key": "test"}
@pytest.mark.integration_test
@pytest.mark.parametrize(
"title, url",
[
@ -272,82 +282,40 @@ class TestSources:
),
],
)
def test_google_source(self, title, url):
def test_backend_source(self, backend, title, url):
"""Test if lyrics present on websites registered in beets google custom
search engine are correctly scraped.
"""
response = raw_backend.fetch_url(url)
response = backend.fetch_url(url)
result = lyrics.scrape_lyrics_from_html(response).lower()
assert google.is_lyrics(result)
assert backend.is_lyrics(result)
assert PHRASE_BY_TITLE[title] in result
@pytest.mark.parametrize(
"backend",
[
pytest.param(
lyrics.Genius,
marks=xfail_on_ci(
"GitHub actions is on some form of Cloudflare blacklist"
),
),
lyrics.Tekstowo,
lyrics.LRCLib,
# lyrics.MusiXmatch,
],
)
def test_backend_source(self, backend):
"""Test default backends with a song known to exist in respective
databases.
"""
plugin = lyrics.LyricsPlugin()
backend = backend(plugin.config, plugin._log)
title = "Lady Madonna"
res = backend.fetch("The Beatles", title)
assert PHRASE_BY_TITLE[title] in res.lower()
class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest):
"""Test scraping heuristics on a fake html page."""
source = dict(
url="http://www.example.com",
artist="John Doe",
title="Beets song",
path="/lyrics/beetssong",
)
def setUp(self):
"""Set up configuration"""
LyricsGoogleBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
@patch.object(lyrics.Backend, "fetch_url", MockFetchUrl())
def test_mocked_source_ok(self):
def test_mocked_source_ok(self, backend):
"""Test that lyrics of the mocked page are correctly scraped"""
url = self.source["url"] + self.source["path"]
res = lyrics.scrape_lyrics_from_html(raw_backend.fetch_url(url))
assert google.is_lyrics(res), url
res = lyrics.scrape_lyrics_from_html(backend.fetch_url(url))
assert backend.is_lyrics(res), url
assert PHRASE_BY_TITLE[self.source["title"]] in res.lower()
@patch.object(lyrics.Backend, "fetch_url", MockFetchUrl())
def test_is_page_candidate_exact_match(self):
def test_is_page_candidate_exact_match(self, backend):
"""Test matching html page title with song infos -- when song infos are
present in the title.
"""
from bs4 import BeautifulSoup, SoupStrainer
s = self.source
url = str(s["url"] + s["path"])
html = raw_backend.fetch_url(url)
html = backend.fetch_url(url)
soup = BeautifulSoup(
html, "html.parser", parse_only=SoupStrainer("title")
)
assert google.is_page_candidate(
assert backend.is_page_candidate(
url, soup.title.string, s["title"], s["artist"]
), url
def test_is_page_candidate_fuzzy_match(self):
def test_is_page_candidate_fuzzy_match(self, backend):
"""Test matching html page title with song infos -- when song infos are
not present in the title.
"""
@ -356,16 +324,16 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest):
url_title = "example.com | Beats song by John doe"
# very small diffs (typo) are ok eg 'beats' vs 'beets' with same artist
assert google.is_page_candidate(
assert backend.is_page_candidate(
url, url_title, s["title"], s["artist"]
), url
# reject different title
url_title = "example.com | seets bong lyrics by John doe"
assert not google.is_page_candidate(
assert not backend.is_page_candidate(
url, url_title, s["title"], s["artist"]
), url
def test_is_page_candidate_special_chars(self):
def test_is_page_candidate_special_chars(self, backend):
"""Ensure that `is_page_candidate` doesn't crash when the artist
and such contain special regular expression characters.
"""
@ -374,30 +342,42 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest):
url = s["url"] + s["path"]
url_title = "foo"
google.is_page_candidate(url, url_title, s["title"], "Sunn O)))")
backend.is_page_candidate(url, url_title, s["title"], "Sunn O)))")
def test_is_lyrics(self, backend):
texts = ["LyricsMania.com - Copyright (c) 2013 - All Rights Reserved"]
texts += [
"""All material found on this site is property\n
of mywickedsongtext brand"""
]
for t in texts:
assert not backend.is_lyrics(t)
def test_slugify(self, backend):
text = "http://site.com/\xe7afe-au_lait(boisson)"
assert backend.slugify(text) == "http://site.com/cafe_au_lait"
def test_missing_lyrics(self, backend):
lyrics = """
Lyricsmania staff is working hard for you to add $TITLE lyrics as soon
as they'll be released by $ARTIST, check back soon!
In case you have the lyrics to $TITLE and want to send them to us, fill out
the following form.
"""
assert not backend.is_lyrics(lyrics)
# test Genius backend
class TestGeniusLyrics(LyricsBackendTest):
@pytest.fixture(scope="class")
def backend_name(self):
return "genius"
@xfail_on_ci("Genius returns 403 FORBIDDEN")
@pytest.mark.integration_test
def test_backend_source(self, backend):
super().test_backend_source(backend)
class GeniusBaseTest(unittest.TestCase):
def setUp(self):
"""Set up configuration."""
try:
__import__("bs4")
except ImportError:
self.skipTest("Beautiful Soup 4 not available")
class GeniusScrapeLyricsFromHtmlTest(GeniusBaseTest):
"""tests Genius._scrape_lyrics_from_html()"""
def setUp(self):
"""Set up configuration"""
GeniusBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
def test_no_lyrics_div(self):
def test_no_lyrics_div(self, backend):
"""Ensure we don't crash when the scraping the html for a genius page
doesn't contain <div class="lyrics"></div>
"""
@ -405,38 +385,27 @@ class GeniusScrapeLyricsFromHtmlTest(GeniusBaseTest):
# expected return value None
url = "https://genius.com/sample"
mock = MockFetchUrl()
assert genius._scrape_lyrics_from_html(mock(url)) is None
assert backend._scrape_lyrics_from_html(mock(url)) is None
def test_good_lyrics(self):
def test_good_lyrics(self, backend):
"""Ensure we are able to scrape a page with lyrics"""
url = "https://genius.com/Ttng-chinchilla-lyrics"
mock = MockFetchUrl()
lyrics = genius._scrape_lyrics_from_html(mock(url))
lyrics = backend._scrape_lyrics_from_html(mock(url))
assert lyrics is not None
assert lyrics.count("\n") == 28
def test_good_lyrics_multiple_divs(self):
def test_good_lyrics_multiple_divs(self, backend):
"""Ensure we are able to scrape a page with lyrics"""
url = "https://genius.com/2pac-all-eyez-on-me-lyrics"
mock = MockFetchUrl()
lyrics = genius._scrape_lyrics_from_html(mock(url))
lyrics = backend._scrape_lyrics_from_html(mock(url))
assert lyrics is not None
assert lyrics.count("\n") == 133
# TODO: find an example of a lyrics page with multiple divs and test it
class GeniusFetchTest(GeniusBaseTest):
"""tests Genius.fetch()"""
def setUp(self):
"""Set up configuration"""
GeniusBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
@patch.object(lyrics.Genius, "_scrape_lyrics_from_html")
@patch.object(lyrics.Backend, "fetch_url", return_value=True)
def test_json(self, mock_fetch_url, mock_scrape):
def test_json(self, mock_fetch_url, mock_scrape, backend):
"""Ensure we're finding artist matches"""
with patch.object(
lyrics.Genius,
@ -464,51 +433,35 @@ class GeniusFetchTest(GeniusBaseTest):
) as mock_json:
# genius uses zero-width-spaces (\u200B) for lowercase
# artists so we make sure we can match those
assert genius.fetch("blackbear", "Idfc") is not None
assert backend.fetch("blackbear", "Idfc") is not None
mock_fetch_url.assert_called_once_with("blackbear_url")
mock_scrape.assert_called_once_with(True)
# genius uses the hyphen minus (\u002D) as their dash
assert genius.fetch("El-p", "Idfc") is not None
assert backend.fetch("El-p", "Idfc") is not None
mock_fetch_url.assert_called_with("El-p_url")
mock_scrape.assert_called_with(True)
# test no matching artist
assert genius.fetch("doesntexist", "none") is None
assert backend.fetch("doesntexist", "none") is None
# test invalid json
mock_json.return_value = None
assert genius.fetch("blackbear", "Idfc") is None
assert backend.fetch("blackbear", "Idfc") is None
# test Tekstowo
class TestTekstowoLyrics(LyricsBackendTest):
@pytest.fixture(scope="class")
def backend_name(self):
return "tekstowo"
class TekstowoBaseTest(unittest.TestCase):
def setUp(self):
"""Set up configuration."""
try:
__import__("bs4")
except ImportError:
self.skipTest("Beautiful Soup 4 not available")
class TekstowoExtractLyricsTest(TekstowoBaseTest):
"""tests Tekstowo.extract_lyrics()"""
def setUp(self):
"""Set up configuration"""
TekstowoBaseTest.setUp(self)
self.plugin = lyrics.LyricsPlugin()
tekstowo.config = self.plugin.config
def test_good_lyrics(self):
def test_good_lyrics(self, backend):
"""Ensure we are able to scrape a page with lyrics"""
url = "https://www.tekstowo.pl/piosenka,24kgoldn,city_of_angels_1.html"
mock = MockFetchUrl()
assert tekstowo.extract_lyrics(mock(url))
assert backend.extract_lyrics(mock(url))
def test_no_lyrics(self):
def test_no_lyrics(self, backend):
"""Ensure we don't crash when the scraping the html for a Tekstowo page
doesn't contain lyrics
"""
@ -517,19 +470,29 @@ class TekstowoExtractLyricsTest(TekstowoBaseTest):
"beethoven_piano_sonata_17_tempest_the_3rd_movement.html"
)
mock = MockFetchUrl()
assert not tekstowo.extract_lyrics(mock(url))
assert not backend.extract_lyrics(mock(url))
# test LRCLib backend
class TestLRCLibLyrics(LyricsBackendTest):
@pytest.fixture(scope="class")
def backend_name(self):
return "lrclib"
@pytest.fixture
def mock_get(self):
with patch("beetsplug.lyrics.requests.get") as mock:
yield mock
class LRCLibLyricsTest(unittest.TestCase):
def setUp(self):
self.plugin = lyrics.LyricsPlugin()
lrclib.config = self.plugin.config
@patch("beetsplug.lyrics.requests.get")
def test_fetch_synced_lyrics(self, mock_get):
@pytest.mark.parametrize(
"plugin_config, expected_lyrics_type",
[
({"synced": True}, "syncedLyrics"),
({"synced": False}, "plainLyrics"),
],
)
def test_synced_config_option(
self, backend, mock_get, expected_lyrics_type
):
mock_response = {
"syncedLyrics": "[00:00.00] la la la",
"plainLyrics": "la la la",
@ -537,16 +500,13 @@ class LRCLibLyricsTest(unittest.TestCase):
mock_get.return_value.json.return_value = mock_response
mock_get.return_value.status_code = 200
self.plugin.config["synced"] = False
lyrics = lrclib.fetch("la", "la", "la", 999)
assert lyrics == mock_response["plainLyrics"]
lyrics = backend.fetch("la", "la", "la", 999)
assert lyrics == mock_response[expected_lyrics_type]
self.plugin.config["synced"] = True
lyrics = lrclib.fetch("la", "la", "la", 999)
assert lyrics == mock_response["syncedLyrics"]
@patch("beetsplug.lyrics.requests.get")
def test_fetch_synced_lyrics_fallback(self, mock_get):
@pytest.mark.parametrize(
"plugin_config", [({"synced": True}), ({"synced": False})]
)
def test_fetch_plain_lyrics(self, backend, mock_get):
mock_response = {
"syncedLyrics": "",
"plainLyrics": "la la la",
@ -554,26 +514,11 @@ class LRCLibLyricsTest(unittest.TestCase):
mock_get.return_value.json.return_value = mock_response
mock_get.return_value.status_code = 200
self.plugin.config["synced"] = True
lyrics = lrclib.fetch("la", "la", "la", 999)
assert lyrics == mock_response["plainLyrics"]
@patch("beetsplug.lyrics.requests.get")
def test_fetch_plain_lyrics(self, mock_get):
mock_response = {
"syncedLyrics": "",
"plainLyrics": "la la la",
}
mock_get.return_value.json.return_value = mock_response
mock_get.return_value.status_code = 200
self.plugin.config["synced"] = False
lyrics = lrclib.fetch("la", "la", "la", 999)
lyrics = backend.fetch("la", "la", "la", 999)
assert lyrics == mock_response["plainLyrics"]
@patch("beetsplug.lyrics.requests.get")
def test_fetch_not_found(self, mock_get):
def test_fetch_not_found(self, backend, mock_get):
mock_response = {
"statusCode": 404,
"error": "Not Found",
@ -582,15 +527,14 @@ class LRCLibLyricsTest(unittest.TestCase):
mock_get.return_value.json.return_value = mock_response
mock_get.return_value.status_code = 404
lyrics = lrclib.fetch("la", "la", "la", 999)
lyrics = backend.fetch("la", "la", "la", 999)
assert lyrics is None
@patch("beetsplug.lyrics.requests.get")
def test_fetch_exception(self, mock_get):
def test_fetch_exception(self, backend, mock_get):
mock_get.side_effect = requests.RequestException
lyrics = lrclib.fetch("la", "la", "la", 999)
lyrics = backend.fetch("la", "la", "la", 999)
assert lyrics is None