mirror of
https://github.com/beetbox/beets.git
synced 2025-12-06 16:42:42 +01:00
Google: test the entire fetch method
This commit is contained in:
parent
334bbde826
commit
c250bfa724
2 changed files with 83 additions and 32 deletions
|
|
@ -231,7 +231,7 @@ class Backend:
|
|||
self._log = log
|
||||
self.config = config
|
||||
|
||||
def fetch_url(self, url):
|
||||
def fetch_url(self, url, **kwargs):
|
||||
"""Retrieve the content at a given URL, or return None if the source
|
||||
is unreachable.
|
||||
"""
|
||||
|
|
@ -249,6 +249,7 @@ class Backend:
|
|||
"User-Agent": USER_AGENT,
|
||||
},
|
||||
timeout=10,
|
||||
**kwargs,
|
||||
)
|
||||
except requests.RequestException as exc:
|
||||
self._log.debug("lyrics request failed: {0}", exc)
|
||||
|
|
@ -604,11 +605,7 @@ class Google(Backend):
|
|||
"""Fetch lyrics from Google search results."""
|
||||
|
||||
REQUIRES_BS = True
|
||||
|
||||
def __init__(self, config, log):
|
||||
super().__init__(config, log)
|
||||
self.api_key = config["google_API_key"].as_str()
|
||||
self.engine_id = config["google_engine_ID"].as_str()
|
||||
SEARCH_URL = "https://www.googleapis.com/customsearch/v1"
|
||||
|
||||
def is_lyrics(self, text, artist=None):
|
||||
"""Determine whether the text seems to be valid lyrics."""
|
||||
|
|
@ -686,14 +683,13 @@ class Google(Backend):
|
|||
return ratio >= typo_ratio
|
||||
|
||||
def fetch(self, artist: str, title: str, *_) -> str | None:
|
||||
query = f"{artist} {title}"
|
||||
url = "https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=%s" % (
|
||||
self.api_key,
|
||||
self.engine_id,
|
||||
quote(query.encode("utf-8")),
|
||||
)
|
||||
params = {
|
||||
"key": self.config["google_API_key"].as_str(),
|
||||
"cx": self.config["google_engine_ID"].as_str(),
|
||||
"q": f"{artist} {title}",
|
||||
}
|
||||
|
||||
data = self.fetch_url(url)
|
||||
data = self.fetch_url(self.SEARCH_URL, params=params)
|
||||
if not data:
|
||||
self._log.debug("google backend returned no data")
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
import os
|
||||
from functools import partial
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -224,45 +225,99 @@ class TestGoogleLyrics(LyricsBackendTest):
|
|||
def file_name(self):
|
||||
return "examplecom/beetssong"
|
||||
|
||||
@pytest.fixture
|
||||
def response_data(self, url_title, url):
|
||||
return {
|
||||
"items": [
|
||||
{
|
||||
"title": url_title,
|
||||
"link": url,
|
||||
"displayLink": urlparse(url).netloc,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def fetch_lyrics(
|
||||
self, backend, requests_mock, response_data, artist, title
|
||||
):
|
||||
requests_mock.get(backend.SEARCH_URL, json=response_data)
|
||||
requests_mock.real_http = True
|
||||
|
||||
return partial(backend.fetch, artist, title)
|
||||
|
||||
@pytest.mark.on_lyrics_update
|
||||
@pytest.mark.parametrize(
|
||||
"title, url",
|
||||
"artist, title, url_title, url",
|
||||
[
|
||||
*(
|
||||
("Lady Madonna", url)
|
||||
for url in (
|
||||
"http://www.chartlyrics.com/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx", # noqa: E501
|
||||
"http://www.absolutelyrics.com/lyrics/view/the_beatles/lady_madonna", # noqa: E501
|
||||
("The Beatles", "Lady Madonna", url_title, url)
|
||||
for url_title, url in (
|
||||
(
|
||||
"The Beatles Lady Madonna lyrics",
|
||||
"http://www.chartlyrics.com/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx",
|
||||
),
|
||||
(
|
||||
"Lady Madonna Lyrics :: The Beatles - Absolute Lyrics",
|
||||
"http://www.absolutelyrics.com/lyrics/view/the_beatles/lady_madonna",
|
||||
),
|
||||
(
|
||||
"Lady Madonna - The Beatles - LETRAS.MUS.BR",
|
||||
"https://www.letras.mus.br/the-beatles/275/",
|
||||
),
|
||||
(
|
||||
"The Beatles - Lady Madonna Lyrics",
|
||||
"https://www.lyricsmania.com/lady_madonna_lyrics_the_beatles.html",
|
||||
),
|
||||
(
|
||||
"Lady Madonna lyrics by The Beatles - original song full text. Official Lady Madonna lyrics, 2024 version | LyricsMode.com", # noqa: E501
|
||||
"https://www.lyricsmode.com/lyrics/b/beatles/lady_madonna.html",
|
||||
),
|
||||
(
|
||||
"Paroles Lady Madonna par The Beatles - Lyrics - Paroles.net",
|
||||
"https://www.paroles.net/the-beatles/paroles-lady-madonna",
|
||||
),
|
||||
(
|
||||
"THE BEATLES - LADY MADONNA LYRICS",
|
||||
"https://www.songlyrics.com/the-beatles/lady-madonna-lyrics/",
|
||||
),
|
||||
(
|
||||
"The Beatles - Lady Madonna",
|
||||
"https://sweetslyrics.com/the-beatles/lady-madonna-lyrics",
|
||||
),
|
||||
(
|
||||
"Lady Madonna - Letra - The Beatles - Musica.com",
|
||||
"https://www.musica.com/letras.asp?letra=59862",
|
||||
),
|
||||
(
|
||||
"Paroles et traduction The Beatles : Lady Madonna - paroles de chanson", # noqa: E501
|
||||
"https://www.lacoccinelle.net/259956-the-beatles-lady-madonna.html",
|
||||
),
|
||||
)
|
||||
),
|
||||
pytest.param(
|
||||
"The Beatles",
|
||||
"Lady Madonna",
|
||||
"The Beatles - Lady Madonna Lyrics | AZLyrics.com",
|
||||
"https://www.azlyrics.com/lyrics/beatles/ladymadonna.html",
|
||||
marks=xfail_on_ci("AZLyrics is blocked by Cloudflare"),
|
||||
),
|
||||
(
|
||||
"Amy Winehouse",
|
||||
"Jazz'n'blues",
|
||||
"https://www.lyricsontop.com/amy-winehouse-songs/jazz-n-blues-lyrics.html", # noqa: E501
|
||||
"Amy Winehouse - Jazz N' Blues lyrics complete",
|
||||
"https://www.lyricsontop.com/amy-winehouse-songs/jazz-n-blues-lyrics.html",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_backend_source(self, backend, title, url):
|
||||
def test_backend_source(self, fetch_lyrics, title):
|
||||
"""Test if lyrics present on websites registered in beets google custom
|
||||
search engine are correctly scraped.
|
||||
"""
|
||||
response = backend.fetch_url(url)
|
||||
result = lyrics.scrape_lyrics_from_html(response).lower()
|
||||
lyrics = fetch_lyrics()
|
||||
|
||||
assert backend.is_lyrics(result)
|
||||
assert PHRASE_BY_TITLE[title] in result
|
||||
assert lyrics
|
||||
assert PHRASE_BY_TITLE[title].lower() in lyrics.lower()
|
||||
|
||||
def test_mocked_source_ok(self, backend, lyrics_html):
|
||||
"""Test that lyrics of the mocked page are correctly scraped"""
|
||||
|
|
|
|||
Loading…
Reference in a new issue