mirror of
https://github.com/beetbox/beets.git
synced 2025-12-15 21:14:19 +01:00
Tidy up 'Google.is_page_candidate' method and remove 'Google.sluggify' method which was a duplicate of 'slug'. Since 'GeniusFetchTest' only tested whether the artist name is cleaned up (the rest of the functionality is patched), remove it and move its test cases to the 'test_slug' test.
507 lines
16 KiB
Python
507 lines
16 KiB
Python
# This file is part of beets.
|
|
# Copyright 2016, Fabrice Laporte.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
|
|
"""Tests for the 'lyrics' plugin."""
|
|
|
|
import re
|
|
from functools import partial
|
|
from http import HTTPStatus
|
|
|
|
import pytest
|
|
|
|
from beets.library import Item
|
|
from beets.test.helper import PluginMixin
|
|
from beetsplug import lyrics
|
|
|
|
from .lyrics_pages import LyricsPage, lyrics_pages
|
|
|
|
PHRASE_BY_TITLE = {
|
|
"Lady Madonna": "friday night arrives without a suitcase",
|
|
"Jazz'n'blues": "as i check my balance i kiss the screen",
|
|
"Beets song": "via plugins, beets becomes a panacea",
|
|
}
|
|
|
|
|
|
class TestLyricsUtils:
|
|
@pytest.mark.parametrize(
|
|
"artist, title",
|
|
[
|
|
("Artist", ""),
|
|
("", "Title"),
|
|
(" ", ""),
|
|
("", " "),
|
|
("", ""),
|
|
],
|
|
)
|
|
def test_search_empty(self, artist, title):
|
|
actual_pairs = lyrics.search_pairs(Item(artist=artist, title=title))
|
|
|
|
assert not list(actual_pairs)
|
|
|
|
@pytest.mark.parametrize(
|
|
"artist, artist_sort, expected_extra_artists",
|
|
[
|
|
("Alice ft. Bob", "", ["Alice"]),
|
|
("Alice feat Bob", "", ["Alice"]),
|
|
("Alice feat. Bob", "", ["Alice"]),
|
|
("Alice feats Bob", "", []),
|
|
("Alice featuring Bob", "", ["Alice"]),
|
|
("Alice & Bob", "", ["Alice"]),
|
|
("Alice and Bob", "", ["Alice"]),
|
|
("Alice", "", []),
|
|
("Alice", "Alice", []),
|
|
("Alice", "alice", []),
|
|
("Alice", "alice ", []),
|
|
("Alice", "Alice A", ["Alice A"]),
|
|
("CHVRCHΞS", "CHVRCHES", ["CHVRCHES"]),
|
|
("横山克", "Masaru Yokoyama", ["Masaru Yokoyama"]),
|
|
],
|
|
)
|
|
def test_search_pairs_artists(
|
|
self, artist, artist_sort, expected_extra_artists
|
|
):
|
|
item = Item(artist=artist, artist_sort=artist_sort, title="song")
|
|
|
|
actual_artists = [a for a, _ in lyrics.search_pairs(item)]
|
|
|
|
# Make sure that the original artist name is still the first entry
|
|
assert actual_artists == [artist, *expected_extra_artists]
|
|
|
|
@pytest.mark.parametrize(
|
|
"title, expected_extra_titles",
|
|
[
|
|
("1/2", ["1", "2"]),
|
|
("1 / 2", ["1", "2"]),
|
|
("Song (live)", ["Song"]),
|
|
("Song (live) (new)", ["Song"]),
|
|
("Song (live (new))", ["Song"]),
|
|
("Song ft. B", ["Song"]),
|
|
("Song featuring B", ["Song"]),
|
|
("Song and B", []),
|
|
("Song: B", ["Song"]),
|
|
],
|
|
)
|
|
def test_search_pairs_titles(self, title, expected_extra_titles):
|
|
item = Item(title=title, artist="A")
|
|
|
|
actual_titles = {
|
|
t: None for _, tit in lyrics.search_pairs(item) for t in tit
|
|
}
|
|
|
|
assert list(actual_titles) == [title, *expected_extra_titles]
|
|
|
|
@pytest.mark.parametrize(
|
|
"initial_lyrics, expected",
|
|
[
|
|
("Verse\nLyrics credit in the last line", "Verse"),
|
|
("Lyrics credit in the first line\nVerse", "Verse"),
|
|
(
|
|
"""Verse
|
|
Lyrics mentioned somewhere in the middle
|
|
Verse""",
|
|
"""Verse
|
|
Lyrics mentioned somewhere in the middle
|
|
Verse""",
|
|
),
|
|
],
|
|
)
|
|
def test_remove_credits(self, initial_lyrics, expected):
|
|
assert lyrics.remove_credits(initial_lyrics) == expected
|
|
|
|
@pytest.mark.parametrize(
|
|
"initial_text, expected",
|
|
[
|
|
(
|
|
"""<!--lyrics below-->
|
|
one
|
|
<br class='myclass'>
|
|
two !
|
|
<br><br \\>
|
|
<blink>four</blink>""",
|
|
"<!--lyrics below-->\none\ntwo !\n\n<blink>four</blink>",
|
|
),
|
|
("foo<script>bar</script>baz", "foobaz"),
|
|
],
|
|
)
|
|
def test_scrape_strip_cruft(self, initial_text, expected):
|
|
assert lyrics._scrape_strip_cruft(initial_text) == expected
|
|
|
|
def test_scrape_merge_paragraphs(self):
|
|
text = "one</p> <p class='myclass'>two</p><p>three"
|
|
assert lyrics._scrape_merge_paragraphs(text) == "one\ntwo\nthree"
|
|
|
|
@pytest.mark.parametrize(
|
|
"text, expected",
|
|
[
|
|
("test", "test"),
|
|
("Mørdag", "mordag"),
|
|
("l'été c'est fait pour jouer", "l-ete-c-est-fait-pour-jouer"),
|
|
("\xe7afe au lait (boisson)", "cafe-au-lait-boisson"),
|
|
("Multiple spaces -- and symbols! -- merged", "multiple-spaces-and-symbols-merged"), # noqa: E501
|
|
("\u200bno-width-space", "no-width-space"),
|
|
("El\u002dp", "el-p"),
|
|
("\u200bblackbear", "blackbear"),
|
|
("\u200d", ""),
|
|
("\u2010", ""),
|
|
],
|
|
) # fmt: skip
|
|
def test_slug(self, text, expected):
|
|
assert lyrics.slug(text) == expected
|
|
|
|
|
|
class TestSearchBackend:
|
|
@pytest.fixture
|
|
def backend(self, dist_thresh):
|
|
plugin = lyrics.LyricsPlugin()
|
|
plugin.config.set({"dist_thresh": dist_thresh})
|
|
return lyrics.SearchBackend(plugin.config, plugin._log)
|
|
|
|
@pytest.mark.parametrize(
|
|
"dist_thresh, target_artist, artist, should_match",
|
|
[
|
|
(0.11, "Target Artist", "Target Artist", True),
|
|
(0.11, "Target Artist", "Target Artis", True),
|
|
(0.11, "Target Artist", "Target Arti", False),
|
|
(0.11, "Psychonaut", "Psychonaut (BEL)", True),
|
|
(0.11, "beets song", "beats song", True),
|
|
(0.10, "beets song", "beats song", False),
|
|
(
|
|
0.11,
|
|
"Lucid Dreams (Forget Me)",
|
|
"Lucid Dreams (Remix) ft. Lil Uzi Vert",
|
|
False,
|
|
),
|
|
(
|
|
0.12,
|
|
"Lucid Dreams (Forget Me)",
|
|
"Lucid Dreams (Remix) ft. Lil Uzi Vert",
|
|
True,
|
|
),
|
|
],
|
|
)
|
|
def test_check_match(self, backend, target_artist, artist, should_match):
|
|
assert (
|
|
backend.check_match(target_artist, "", artist, "") == should_match
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def lyrics_root_dir(pytestconfig: pytest.Config):
|
|
return pytestconfig.rootpath / "test" / "rsrc" / "lyrics"
|
|
|
|
|
|
class LyricsPluginMixin(PluginMixin):
|
|
plugin = "lyrics"
|
|
|
|
@pytest.fixture
|
|
def plugin_config(self):
|
|
"""Return lyrics configuration to test."""
|
|
return {}
|
|
|
|
@pytest.fixture
|
|
def lyrics_plugin(self, backend_name, plugin_config):
|
|
"""Set configuration and returns the plugin's instance."""
|
|
plugin_config["sources"] = [backend_name]
|
|
self.config[self.plugin].set(plugin_config)
|
|
|
|
return lyrics.LyricsPlugin()
|
|
|
|
|
|
class TestLyricsPlugin(LyricsPluginMixin):
|
|
@pytest.fixture
|
|
def backend_name(self):
|
|
"""Return lyrics configuration to test."""
|
|
return "lrclib"
|
|
|
|
@pytest.mark.parametrize(
|
|
"request_kwargs, expected_log_match",
|
|
[
|
|
(
|
|
{"status_code": HTTPStatus.BAD_GATEWAY},
|
|
r"LRCLib: Request error: 502",
|
|
),
|
|
({"text": "invalid"}, r"LRCLib: Could not decode.*JSON"),
|
|
],
|
|
)
|
|
def test_error_handling(
|
|
self,
|
|
requests_mock,
|
|
lyrics_plugin,
|
|
caplog,
|
|
request_kwargs,
|
|
expected_log_match,
|
|
):
|
|
"""Errors are logged with the backend name."""
|
|
requests_mock.get(lyrics.LRCLib.SEARCH_URL, **request_kwargs)
|
|
|
|
assert lyrics_plugin.get_lyrics("", "", "", 0.0) is None
|
|
assert caplog.messages
|
|
last_log = caplog.messages[-1]
|
|
assert last_log
|
|
assert re.search(expected_log_match, last_log, re.I)
|
|
|
|
|
|
class LyricsBackendTest(LyricsPluginMixin):
|
|
@pytest.fixture
|
|
def backend(self, lyrics_plugin):
|
|
"""Return a lyrics backend instance."""
|
|
return lyrics_plugin.backends[0]
|
|
|
|
@pytest.fixture
|
|
def lyrics_html(self, lyrics_root_dir, file_name):
|
|
return (lyrics_root_dir / f"{file_name}.txt").read_text(
|
|
encoding="utf-8"
|
|
)
|
|
|
|
|
|
@pytest.mark.on_lyrics_update
|
|
class TestLyricsSources(LyricsBackendTest):
|
|
@pytest.fixture(scope="class")
|
|
def plugin_config(self):
|
|
return {"google_API_key": "test", "synced": True}
|
|
|
|
@pytest.fixture(
|
|
params=[pytest.param(lp, marks=lp.marks) for lp in lyrics_pages],
|
|
ids=str,
|
|
)
|
|
def lyrics_page(self, request):
|
|
return request.param
|
|
|
|
@pytest.fixture
|
|
def backend_name(self, lyrics_page):
|
|
return lyrics_page.backend
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _patch_google_search(self, requests_mock, lyrics_page):
|
|
"""Mock the Google Search API to return the lyrics page under test."""
|
|
requests_mock.real_http = True
|
|
|
|
data = {
|
|
"items": [
|
|
{
|
|
"title": lyrics_page.url_title,
|
|
"link": lyrics_page.url,
|
|
"displayLink": lyrics_page.root_url,
|
|
}
|
|
]
|
|
}
|
|
requests_mock.get(lyrics.Google.SEARCH_URL, json=data)
|
|
|
|
def test_backend_source(self, lyrics_plugin, lyrics_page: LyricsPage):
|
|
"""Test parsed lyrics from each of the configured lyrics pages."""
|
|
lyrics = lyrics_plugin.get_lyrics(
|
|
lyrics_page.artist, lyrics_page.track_title, "", 186
|
|
)
|
|
|
|
assert lyrics
|
|
assert lyrics == lyrics_page.lyrics
|
|
|
|
|
|
class TestGoogleLyrics(LyricsBackendTest):
|
|
"""Test scraping heuristics on a fake html page."""
|
|
|
|
TITLE = "Beets song"
|
|
|
|
@pytest.fixture(scope="class")
|
|
def backend_name(self):
|
|
return "google"
|
|
|
|
@pytest.fixture(scope="class")
|
|
def plugin_config(self):
|
|
return {"google_API_key": "test"}
|
|
|
|
@pytest.fixture(scope="class")
|
|
def file_name(self):
|
|
return "examplecom/beetssong"
|
|
|
|
def test_mocked_source_ok(self, backend, lyrics_html):
|
|
"""Test that lyrics of the mocked page are correctly scraped"""
|
|
result = backend.scrape_lyrics(lyrics_html).lower()
|
|
|
|
assert result
|
|
assert backend.is_lyrics(result)
|
|
assert PHRASE_BY_TITLE[self.TITLE] in result
|
|
|
|
@pytest.mark.parametrize(
|
|
"url_title, artist, should_be_candidate",
|
|
[
|
|
("John Doe - beets song Lyrics", "John Doe", True),
|
|
("example.com | Beats song by John doe", "John Doe", True),
|
|
("example.com | seets bong lyrics by John doe", "John Doe", False),
|
|
("foo", "Sun O)))", False),
|
|
],
|
|
)
|
|
def test_is_page_candidate(
|
|
self, backend, lyrics_html, url_title, artist, should_be_candidate
|
|
):
|
|
result = backend.is_page_candidate(
|
|
artist,
|
|
self.TITLE,
|
|
"http://www.example.com/lyrics/beetssong",
|
|
url_title,
|
|
)
|
|
assert bool(result) == should_be_candidate
|
|
|
|
@pytest.mark.parametrize(
|
|
"lyrics",
|
|
[
|
|
"LyricsMania.com - Copyright (c) 2013 - All Rights Reserved",
|
|
"""All material found on this site is property\n
|
|
of mywickedsongtext brand""",
|
|
"""
|
|
Lyricsmania staff is working hard for you to add $TITLE lyrics as soon
|
|
as they'll be released by $ARTIST, check back soon!
|
|
In case you have the lyrics to $TITLE and want to send them to us, fill out
|
|
the following form.
|
|
""",
|
|
],
|
|
)
|
|
def test_bad_lyrics(self, backend, lyrics):
|
|
assert not backend.is_lyrics(lyrics)
|
|
|
|
|
|
class TestGeniusLyrics(LyricsBackendTest):
|
|
@pytest.fixture(scope="class")
|
|
def backend_name(self):
|
|
return "genius"
|
|
|
|
@pytest.mark.parametrize(
|
|
"file_name, expected_line_count",
|
|
[
|
|
("geniuscom/2pacalleyezonmelyrics", 134),
|
|
("geniuscom/Ttngchinchillalyrics", 29),
|
|
("geniuscom/sample", 0), # see https://github.com/beetbox/beets/issues/3535
|
|
],
|
|
) # fmt: skip
|
|
def test_scrape(self, backend, lyrics_html, expected_line_count):
|
|
result = backend.scrape_lyrics(lyrics_html) or ""
|
|
|
|
assert len(result.splitlines()) == expected_line_count
|
|
|
|
|
|
class TestTekstowoLyrics(LyricsBackendTest):
|
|
@pytest.fixture(scope="class")
|
|
def backend_name(self):
|
|
return "tekstowo"
|
|
|
|
@pytest.mark.parametrize(
|
|
"file_name, expecting_lyrics",
|
|
[
|
|
("tekstowopl/piosenka24kgoldncityofangels1", True),
|
|
(
|
|
"tekstowopl/piosenkabeethovenbeethovenpianosonata17tempestthe3rdmovement", # noqa: E501
|
|
False,
|
|
),
|
|
],
|
|
)
|
|
def test_scrape(self, backend, lyrics_html, expecting_lyrics):
|
|
assert bool(backend.scrape_lyrics(lyrics_html)) == expecting_lyrics
|
|
|
|
|
|
LYRICS_DURATION = 950
|
|
|
|
|
|
def lyrics_match(**overrides):
|
|
return {
|
|
"instrumental": False,
|
|
"duration": LYRICS_DURATION,
|
|
"syncedLyrics": "synced",
|
|
"plainLyrics": "plain",
|
|
**overrides,
|
|
}
|
|
|
|
|
|
class TestLRCLibLyrics(LyricsBackendTest):
|
|
ITEM_DURATION = 999
|
|
|
|
@pytest.fixture(scope="class")
|
|
def backend_name(self):
|
|
return "lrclib"
|
|
|
|
@pytest.fixture
|
|
def fetch_lyrics(self, backend, requests_mock, response_data):
|
|
requests_mock.get(backend.GET_URL, status_code=HTTPStatus.NOT_FOUND)
|
|
requests_mock.get(backend.SEARCH_URL, json=response_data)
|
|
|
|
return partial(backend.fetch, "la", "la", "la", self.ITEM_DURATION)
|
|
|
|
@pytest.mark.parametrize("response_data", [[lyrics_match()]])
|
|
@pytest.mark.parametrize(
|
|
"plugin_config, expected_lyrics",
|
|
[({"synced": True}, "synced"), ({"synced": False}, "plain")],
|
|
)
|
|
def test_synced_config_option(self, fetch_lyrics, expected_lyrics):
|
|
assert fetch_lyrics() == expected_lyrics
|
|
|
|
@pytest.mark.parametrize(
|
|
"response_data, expected_lyrics",
|
|
[
|
|
pytest.param([], None, id="handle non-matching lyrics"),
|
|
pytest.param(
|
|
[lyrics_match()],
|
|
"synced",
|
|
id="synced when available",
|
|
),
|
|
pytest.param(
|
|
[lyrics_match(duration=1)],
|
|
None,
|
|
id="none: duration too short",
|
|
),
|
|
pytest.param(
|
|
[lyrics_match(instrumental=True)],
|
|
"[Instrumental]",
|
|
id="instrumental track",
|
|
),
|
|
pytest.param(
|
|
[lyrics_match(syncedLyrics=None)],
|
|
"plain",
|
|
id="plain by default",
|
|
),
|
|
pytest.param(
|
|
[
|
|
lyrics_match(
|
|
duration=ITEM_DURATION,
|
|
syncedLyrics=None,
|
|
plainLyrics="plain with closer duration",
|
|
),
|
|
lyrics_match(syncedLyrics="synced", plainLyrics="plain 2"),
|
|
],
|
|
"synced",
|
|
id="prefer synced lyrics even if plain duration is closer",
|
|
),
|
|
pytest.param(
|
|
[
|
|
lyrics_match(
|
|
duration=ITEM_DURATION,
|
|
syncedLyrics=None,
|
|
plainLyrics="valid plain",
|
|
),
|
|
lyrics_match(
|
|
duration=1,
|
|
syncedLyrics="synced with invalid duration",
|
|
),
|
|
],
|
|
"valid plain",
|
|
id="ignore synced with invalid duration",
|
|
),
|
|
pytest.param(
|
|
[lyrics_match(syncedLyrics=None), lyrics_match()],
|
|
"synced",
|
|
id="prefer match with synced lyrics",
|
|
),
|
|
],
|
|
)
|
|
@pytest.mark.parametrize("plugin_config", [{"synced": True}])
|
|
def test_fetch_lyrics(self, fetch_lyrics, expected_lyrics):
|
|
assert fetch_lyrics() == expected_lyrics
|