Google: test the entire fetch method

This commit is contained in:
Šarūnas Nejus 2024-10-18 01:03:01 +01:00
parent 334bbde826
commit c250bfa724
No known key found for this signature in database
GPG key ID: DD28F6704DBE3435
2 changed files with 83 additions and 32 deletions

View file

@ -231,7 +231,7 @@ class Backend:
self._log = log self._log = log
self.config = config self.config = config
def fetch_url(self, url): def fetch_url(self, url, **kwargs):
"""Retrieve the content at a given URL, or return None if the source """Retrieve the content at a given URL, or return None if the source
is unreachable. is unreachable.
""" """
@ -249,6 +249,7 @@ class Backend:
"User-Agent": USER_AGENT, "User-Agent": USER_AGENT,
}, },
timeout=10, timeout=10,
**kwargs,
) )
except requests.RequestException as exc: except requests.RequestException as exc:
self._log.debug("lyrics request failed: {0}", exc) self._log.debug("lyrics request failed: {0}", exc)
@ -604,11 +605,7 @@ class Google(Backend):
"""Fetch lyrics from Google search results.""" """Fetch lyrics from Google search results."""
REQUIRES_BS = True REQUIRES_BS = True
SEARCH_URL = "https://www.googleapis.com/customsearch/v1"
def __init__(self, config, log):
super().__init__(config, log)
self.api_key = config["google_API_key"].as_str()
self.engine_id = config["google_engine_ID"].as_str()
def is_lyrics(self, text, artist=None): def is_lyrics(self, text, artist=None):
"""Determine whether the text seems to be valid lyrics.""" """Determine whether the text seems to be valid lyrics."""
@ -686,14 +683,13 @@ class Google(Backend):
return ratio >= typo_ratio return ratio >= typo_ratio
def fetch(self, artist: str, title: str, *_) -> str | None: def fetch(self, artist: str, title: str, *_) -> str | None:
query = f"{artist} {title}" params = {
url = "https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=%s" % ( "key": self.config["google_API_key"].as_str(),
self.api_key, "cx": self.config["google_engine_ID"].as_str(),
self.engine_id, "q": f"{artist} {title}",
quote(query.encode("utf-8")), }
)
data = self.fetch_url(url) data = self.fetch_url(self.SEARCH_URL, params=params)
if not data: if not data:
self._log.debug("google backend returned no data") self._log.debug("google backend returned no data")
return None return None

View file

@ -16,6 +16,7 @@
import os import os
from functools import partial from functools import partial
from urllib.parse import urlparse
import pytest import pytest
@ -224,45 +225,99 @@ class TestGoogleLyrics(LyricsBackendTest):
def file_name(self): def file_name(self):
return "examplecom/beetssong" return "examplecom/beetssong"
@pytest.fixture
def response_data(self, url_title, url):
return {
"items": [
{
"title": url_title,
"link": url,
"displayLink": urlparse(url).netloc,
}
]
}
@pytest.fixture
def fetch_lyrics(
self, backend, requests_mock, response_data, artist, title
):
requests_mock.get(backend.SEARCH_URL, json=response_data)
requests_mock.real_http = True
return partial(backend.fetch, artist, title)
@pytest.mark.on_lyrics_update @pytest.mark.on_lyrics_update
@pytest.mark.parametrize( @pytest.mark.parametrize(
"title, url", "artist, title, url_title, url",
[ [
*( *(
("Lady Madonna", url) ("The Beatles", "Lady Madonna", url_title, url)
for url in ( for url_title, url in (
"http://www.chartlyrics.com/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx", # noqa: E501 (
"http://www.absolutelyrics.com/lyrics/view/the_beatles/lady_madonna", # noqa: E501 "The Beatles Lady Madonna lyrics",
"http://www.chartlyrics.com/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx",
),
(
"Lady Madonna Lyrics :: The Beatles - Absolute Lyrics",
"http://www.absolutelyrics.com/lyrics/view/the_beatles/lady_madonna",
),
(
"Lady Madonna - The Beatles - LETRAS.MUS.BR",
"https://www.letras.mus.br/the-beatles/275/", "https://www.letras.mus.br/the-beatles/275/",
),
(
"The Beatles - Lady Madonna Lyrics",
"https://www.lyricsmania.com/lady_madonna_lyrics_the_beatles.html", "https://www.lyricsmania.com/lady_madonna_lyrics_the_beatles.html",
),
(
"Lady Madonna lyrics by The Beatles - original song full text. Official Lady Madonna lyrics, 2024 version | LyricsMode.com", # noqa: E501
"https://www.lyricsmode.com/lyrics/b/beatles/lady_madonna.html", "https://www.lyricsmode.com/lyrics/b/beatles/lady_madonna.html",
),
(
"Paroles Lady Madonna par The Beatles - Lyrics - Paroles.net",
"https://www.paroles.net/the-beatles/paroles-lady-madonna", "https://www.paroles.net/the-beatles/paroles-lady-madonna",
),
(
"THE BEATLES - LADY MADONNA LYRICS",
"https://www.songlyrics.com/the-beatles/lady-madonna-lyrics/", "https://www.songlyrics.com/the-beatles/lady-madonna-lyrics/",
),
(
"The Beatles - Lady Madonna",
"https://sweetslyrics.com/the-beatles/lady-madonna-lyrics", "https://sweetslyrics.com/the-beatles/lady-madonna-lyrics",
),
(
"Lady Madonna - Letra - The Beatles - Musica.com",
"https://www.musica.com/letras.asp?letra=59862", "https://www.musica.com/letras.asp?letra=59862",
),
(
"Paroles et traduction The Beatles : Lady Madonna - paroles de chanson", # noqa: E501
"https://www.lacoccinelle.net/259956-the-beatles-lady-madonna.html", "https://www.lacoccinelle.net/259956-the-beatles-lady-madonna.html",
),
) )
), ),
pytest.param( pytest.param(
"The Beatles",
"Lady Madonna", "Lady Madonna",
"The Beatles - Lady Madonna Lyrics | AZLyrics.com",
"https://www.azlyrics.com/lyrics/beatles/ladymadonna.html", "https://www.azlyrics.com/lyrics/beatles/ladymadonna.html",
marks=xfail_on_ci("AZLyrics is blocked by Cloudflare"), marks=xfail_on_ci("AZLyrics is blocked by Cloudflare"),
), ),
( (
"Amy Winehouse",
"Jazz'n'blues", "Jazz'n'blues",
"https://www.lyricsontop.com/amy-winehouse-songs/jazz-n-blues-lyrics.html", # noqa: E501 "Amy Winehouse - Jazz N' Blues lyrics complete",
"https://www.lyricsontop.com/amy-winehouse-songs/jazz-n-blues-lyrics.html",
), ),
], ],
) )
def test_backend_source(self, backend, title, url): def test_backend_source(self, fetch_lyrics, title):
"""Test if lyrics present on websites registered in beets google custom """Test if lyrics present on websites registered in beets google custom
search engine are correctly scraped. search engine are correctly scraped.
""" """
response = backend.fetch_url(url) lyrics = fetch_lyrics()
result = lyrics.scrape_lyrics_from_html(response).lower()
assert backend.is_lyrics(result) assert lyrics
assert PHRASE_BY_TITLE[title] in result assert PHRASE_BY_TITLE[title].lower() in lyrics.lower()
def test_mocked_source_ok(self, backend, lyrics_html): def test_mocked_source_ok(self, backend, lyrics_html):
"""Test that lyrics of the mocked page are correctly scraped""" """Test that lyrics of the mocked page are correctly scraped"""