mirror of
https://github.com/beetbox/beets.git
synced 2025-12-15 04:55:10 +01:00
Lyrics: Refactor Genius, Google backends, and consolidate common functionality (#5474)
### Bug Fixes - Fixed #4791: Resolved an issue with the Genius backend where it couldn't match lyrics if there was a slight variation in the artist's name. ### Plugin Enhancements * **Session Management**: Introduced a `TimeoutSession` to enable connection pooling and maintain consistent configuration across requests. * **Error Handling**: Centralized error handling logic in a new `RequestsHandler` class, which includes methods for retrieving either HTML text or JSON data. * **Logging**: Added methods to ensure the backend name is included in log messages. ### Configuration Changes * Added a new `dist_thresh` field to the configuration, allowing users to control the maximum tolerable mismatch between the artist and title of the lyrics search result and their item. Interestingly, this field was previously available (though undocumented) and used in the `Tekstowo` backend. Now, this threshold has also been applied to **Genius** and **Google** search logic. ### Backend Updates * All backends that perform searches now validate each result against the configured `dist_thresh`. #### Genius * Removed the need to scrape HTML tags for lyrics; instead, lyrics are now parsed from the JSON data embedded in the HTML. This change should reduce our vulnerability to Genius' frequent alterations in their HTML structure. * Documented the structure of their search JSON data. #### Google * Typed the response data returned by the Google Custom Search API. * Excluded certain pages under **https://letras.mus.br** that do not contain lyrics. * Excluded all results from MusiXmatch, as we cannot access their pages. * Improved parsing of URL titles (used for matching item/lyrics artist/title): - Handled results from long search queries where URL titles are truncated with an ellipsis. - Enhanced URL title cleanup logic. - Added functionality to determine (or rather, guess) not only the track title but also the artist from the URL title. * Similar to #5406, search results are now compared to the original item and sorted by distance. Results exceeding the configured `dist_thresh` value are discarded. The previous functionality simply selected the first result containing the track's title in its URL, which often led to returning lyrics for the wrong artist, particularly for short track titles. * Since we now fetch lyrics confidently, redundant checks for valid lyrics and credits cleanup have been removed. ### HTML Cleanup * Organized regex patterns into a new `Html` class. * Adjusted patterns to ensure new lines between blocks of lyrics text scraped from `letras.mus.br` and `musica.com`. * Modified patterns to scrape missing lyrics text on `paroles.net` and `lacoccinelle.net`. See the diff in `test/plugins/lyrics_page.py`.
This commit is contained in:
commit
a1c0ebdeef
7 changed files with 894 additions and 799 deletions
115
beetsplug/_typing.py
Normal file
115
beetsplug/_typing.py
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from typing_extensions import NotRequired, TypedDict
|
||||
|
||||
JSONDict = dict[str, Any]
|
||||
|
||||
|
||||
class LRCLibAPI:
|
||||
class Item(TypedDict):
|
||||
"""Lyrics data item returned by the LRCLib API."""
|
||||
|
||||
id: int
|
||||
name: str
|
||||
trackName: str
|
||||
artistName: str
|
||||
albumName: str
|
||||
duration: float | None
|
||||
instrumental: bool
|
||||
plainLyrics: str
|
||||
syncedLyrics: str | None
|
||||
|
||||
|
||||
class GeniusAPI:
|
||||
"""Genius API data types.
|
||||
|
||||
This documents *only* the fields that are used in the plugin.
|
||||
:attr:`SearchResult` is an exception, since I thought some of the other
|
||||
fields might be useful in the future.
|
||||
"""
|
||||
|
||||
class DateComponents(TypedDict):
|
||||
year: int
|
||||
month: int
|
||||
day: int
|
||||
|
||||
class Artist(TypedDict):
|
||||
api_path: str
|
||||
header_image_url: str
|
||||
id: int
|
||||
image_url: str
|
||||
is_meme_verified: bool
|
||||
is_verified: bool
|
||||
name: str
|
||||
url: str
|
||||
|
||||
class Stats(TypedDict):
|
||||
unreviewed_annotations: int
|
||||
hot: bool
|
||||
|
||||
class SearchResult(TypedDict):
|
||||
annotation_count: int
|
||||
api_path: str
|
||||
artist_names: str
|
||||
full_title: str
|
||||
header_image_thumbnail_url: str
|
||||
header_image_url: str
|
||||
id: int
|
||||
lyrics_owner_id: int
|
||||
lyrics_state: str
|
||||
path: str
|
||||
primary_artist_names: str
|
||||
pyongs_count: int | None
|
||||
relationships_index_url: str
|
||||
release_date_components: GeniusAPI.DateComponents
|
||||
release_date_for_display: str
|
||||
release_date_with_abbreviated_month_for_display: str
|
||||
song_art_image_thumbnail_url: str
|
||||
song_art_image_url: str
|
||||
stats: GeniusAPI.Stats
|
||||
title: str
|
||||
title_with_featured: str
|
||||
url: str
|
||||
featured_artists: list[GeniusAPI.Artist]
|
||||
primary_artist: GeniusAPI.Artist
|
||||
primary_artists: list[GeniusAPI.Artist]
|
||||
|
||||
class SearchHit(TypedDict):
|
||||
result: GeniusAPI.SearchResult
|
||||
|
||||
class SearchResponse(TypedDict):
|
||||
hits: list[GeniusAPI.SearchHit]
|
||||
|
||||
class Search(TypedDict):
|
||||
response: GeniusAPI.SearchResponse
|
||||
|
||||
|
||||
class GoogleCustomSearchAPI:
|
||||
class Response(TypedDict):
|
||||
"""Search response from the Google Custom Search API.
|
||||
|
||||
If the search returns no results, the :attr:`items` field is not found.
|
||||
"""
|
||||
|
||||
items: NotRequired[list[GoogleCustomSearchAPI.Item]]
|
||||
|
||||
class Item(TypedDict):
|
||||
"""A Google Custom Search API result item.
|
||||
|
||||
:attr:`title` field is shown to the user in the search interface, thus
|
||||
it gets truncated with an ellipsis for longer queries. For most
|
||||
results, the full title is available as ``og:title`` metatag found
|
||||
under the :attr:`pagemap` field. Note neither this metatag nor the
|
||||
``pagemap`` field is guaranteed to be present in the data.
|
||||
"""
|
||||
|
||||
title: str
|
||||
link: str
|
||||
pagemap: NotRequired[GoogleCustomSearchAPI.Pagemap]
|
||||
|
||||
class Pagemap(TypedDict):
|
||||
"""Pagemap data with a single meta tags dict in a list."""
|
||||
|
||||
metatags: list[JSONDict]
|
||||
1079
beetsplug/lyrics.py
1079
beetsplug/lyrics.py
File diff suppressed because it is too large
Load diff
|
|
@ -11,6 +11,15 @@ been dropped.
|
|||
|
||||
New features:
|
||||
|
||||
* :doc:`plugins/lastgenre`: The new configuration option, ``keep_existing``,
|
||||
provides more fine-grained control over how pre-populated genre tags are
|
||||
handled. The ``force`` option now behaves in a more conventional manner.
|
||||
:bug:`4982`
|
||||
* :doc:`plugins/lyrics`: Add new configuration option ``dist_thresh`` to
|
||||
control the maximum allowed distance between the lyrics search result and the
|
||||
tagged item's artist and title. This is useful for preventing false positives
|
||||
when fetching lyrics.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* :doc:`plugins/lyrics`: LRCLib will fallback to plain lyrics if synced lyrics
|
||||
|
|
@ -55,10 +64,12 @@ Bug fixes:
|
|||
``lrclib`` over other sources since it returns reliable results quicker than
|
||||
others.
|
||||
:bug:`5102`
|
||||
* :doc:`plugins/lastgenre`: The new configuration option, ``keep_existing``,
|
||||
provides more fine-grained control over how pre-populated genre tags are
|
||||
handled. The ``force`` option now behaves in a more conventional manner.
|
||||
:bug:`4982`
|
||||
* :doc:`plugins/lyrics`: Fix the issue with ``genius`` backend not being able
|
||||
to match lyrics when there is a slight variation in the artist name.
|
||||
:bug:`4791`
|
||||
* :doc:`plugins/lyrics`: Fix plugin crash when ``genius`` backend returns empty
|
||||
lyrics.
|
||||
:bug:`5583`
|
||||
|
||||
For packagers:
|
||||
|
||||
|
|
|
|||
|
|
@ -2,25 +2,27 @@ Lyrics Plugin
|
|||
=============
|
||||
|
||||
The ``lyrics`` plugin fetches and stores song lyrics from databases on the Web.
|
||||
Namely, the current version of the plugin uses `Genius.com`_, `Tekstowo.pl`_, `LRCLIB`_
|
||||
and, optionally, the Google custom search API.
|
||||
Namely, the current version of the plugin uses `Genius.com`_, `Tekstowo.pl`_,
|
||||
`LRCLIB`_ and, optionally, the Google Custom Search API.
|
||||
|
||||
.. _Genius.com: https://genius.com/
|
||||
.. _Tekstowo.pl: https://www.tekstowo.pl/
|
||||
.. _LRCLIB: https://lrclib.net/
|
||||
|
||||
|
||||
Fetch Lyrics During Import
|
||||
--------------------------
|
||||
Install
|
||||
-------
|
||||
|
||||
To automatically fetch lyrics for songs you import, first enable it in your
|
||||
configuration (see :ref:`using-plugins`). Then, install ``beets`` with
|
||||
``lyrics`` extra
|
||||
Firstly, enable ``lyrics`` plugin in your configuration (see
|
||||
:ref:`using-plugins`). Then, install ``beets`` with ``lyrics`` extra
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install "beets[lyrics]"
|
||||
|
||||
Fetch Lyrics During Import
|
||||
--------------------------
|
||||
|
||||
When importing new files, beets will now fetch lyrics for files that don't
|
||||
already have them. The lyrics will be stored in the beets database. If the
|
||||
``import.write`` config option is on, then the lyrics will also be written to
|
||||
|
|
@ -29,46 +31,52 @@ the files' tags.
|
|||
Configuration
|
||||
-------------
|
||||
|
||||
To configure the plugin, make a ``lyrics:`` section in your
|
||||
configuration file. The available options are:
|
||||
To configure the plugin, make a ``lyrics:`` section in your configuration file.
|
||||
Default configuration:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
lyrics:
|
||||
auto: yes
|
||||
bing_client_secret: null
|
||||
bing_lang_from: []
|
||||
bing_lang_to: null
|
||||
dist_thresh: 0.11
|
||||
fallback: null
|
||||
force: no
|
||||
google_API_key: null
|
||||
google_engine_ID: 009217259823014548361:lndtuqkycfu
|
||||
sources: [lrclib, google, genius, tekstowo]
|
||||
synced: no
|
||||
|
||||
The available options are:
|
||||
|
||||
- **auto**: Fetch lyrics automatically during import.
|
||||
Default: ``yes``.
|
||||
- **bing_client_secret**: Your Bing Translation application password
|
||||
(to :ref:`lyrics-translation`)
|
||||
(see :ref:`lyrics-translation`)
|
||||
- **bing_lang_from**: By default all lyrics with a language other than
|
||||
``bing_lang_to`` are translated. Use a list of lang codes to restrict the set
|
||||
of source languages to translate.
|
||||
Default: ``[]``
|
||||
- **bing_lang_to**: Language to translate lyrics into.
|
||||
Default: None.
|
||||
- **dist_thresh**: The maximum distance between the artist and title
|
||||
combination of the music file and lyrics candidate to consider them a match.
|
||||
Lower values will make the plugin more strict, higher values will make it
|
||||
more lenient. This does not apply to the ``lrclib`` backend as it matches
|
||||
durations.
|
||||
- **fallback**: By default, the file will be left unchanged when no lyrics are
|
||||
found. Use the empty string ``''`` to reset the lyrics in such a case.
|
||||
Default: None.
|
||||
- **force**: By default, beets won't fetch lyrics if the files already have
|
||||
ones. To instead always fetch lyrics, set the ``force`` option to ``yes``.
|
||||
Default: ``no``.
|
||||
- **google_API_key**: Your Google API key (to enable the Google Custom Search
|
||||
backend).
|
||||
Default: None.
|
||||
- **google_engine_ID**: The custom search engine to use.
|
||||
Default: The `beets custom search engine`_, which gathers an updated list of
|
||||
sources known to be scrapeable.
|
||||
- **sources**: List of sources to search for lyrics. An asterisk ``*`` expands
|
||||
to all available sources.
|
||||
Default: ``lrclib google genius tekstowo``, i.e., all the available sources. The
|
||||
``google`` source will be automatically deactivated if no ``google_API_key``
|
||||
is setup.
|
||||
The ``google``, ``genius``, and ``tekstowo`` sources will only be enabled if
|
||||
BeautifulSoup is installed.
|
||||
- **synced**: Prefer synced lyrics over plain lyrics if a source offers them. Currently `lrclib` is the only source that provides them. Default: `no`.
|
||||
|
||||
Here's an example of ``config.yaml``::
|
||||
|
||||
lyrics:
|
||||
fallback: ''
|
||||
google_API_key: AZERTYUIOPQSDFGHJKLMWXCVBN1234567890_ab
|
||||
google_engine_ID: 009217259823014548361:lndtuqkycfu
|
||||
to all available sources. The ``google`` source will be automatically
|
||||
deactivated if no ``google_API_key`` is setup.
|
||||
- **synced**: Prefer synced lyrics over plain lyrics if a source offers them.
|
||||
Currently ``lrclib`` is the only source that provides them.
|
||||
|
||||
.. _beets custom search engine: https://www.google.com:443/cse/publicurl?cx=009217259823014548361:lndtuqkycfu
|
||||
|
||||
|
|
@ -83,74 +91,74 @@ by that band, and ``beet lyrics`` will get lyrics for my entire library. The
|
|||
lyrics will be added to the beets database and, if ``import.write`` is on,
|
||||
embedded into files' metadata.
|
||||
|
||||
The ``-p`` option to the ``lyrics`` command makes it print lyrics out to the
|
||||
console so you can view the fetched (or previously-stored) lyrics.
|
||||
The ``-p, --print`` option to the ``lyrics`` command makes it print lyrics out
|
||||
to the console so you can view the fetched (or previously-stored) lyrics.
|
||||
|
||||
The ``-f`` option forces the command to fetch lyrics, even for tracks that
|
||||
already have lyrics. Inversely, the ``-l`` option restricts operations
|
||||
to lyrics that are locally available, which show lyrics faster without using
|
||||
the network at all.
|
||||
The ``-f, --force`` option forces the command to fetch lyrics, even for tracks
|
||||
that already have lyrics.
|
||||
|
||||
Inversely, the ``-l, --local`` option restricts operations to lyrics that are
|
||||
locally available, which show lyrics faster without using the network at all.
|
||||
|
||||
Rendering Lyrics into Other Formats
|
||||
-----------------------------------
|
||||
|
||||
The ``-r directory`` option renders all lyrics as `reStructuredText`_ (ReST)
|
||||
documents in ``directory`` (by default, the current directory). That
|
||||
directory, in turn, can be parsed by tools like `Sphinx`_ to generate HTML,
|
||||
ePUB, or PDF documents.
|
||||
The ``-r directory, --write-rest directory`` option renders all lyrics as
|
||||
`reStructuredText`_ (ReST) documents in ``directory`` (by default, the current
|
||||
directory). That directory, in turn, can be parsed by tools like `Sphinx`_ to
|
||||
generate HTML, ePUB, or PDF documents.
|
||||
|
||||
A minimal ``conf.py`` and ``index.rst`` files are created the first time the
|
||||
Minimal ``conf.py`` and ``index.rst`` files are created the first time the
|
||||
command is run. They are not overwritten on subsequent runs, so you can safely
|
||||
modify these files to customize the output.
|
||||
|
||||
Sphinx supports various `builders`_, see a few suggestions:
|
||||
|
||||
|
||||
.. admonition:: Build an HTML version
|
||||
|
||||
::
|
||||
|
||||
sphinx-build -b html . _build/html
|
||||
|
||||
.. admonition:: Build an ePUB3 formatted file, usable on ebook readers
|
||||
|
||||
::
|
||||
|
||||
sphinx-build -b epub3 . _build/epub
|
||||
|
||||
.. admonition:: Build a PDF file, which incidentally also builds a LaTeX file
|
||||
|
||||
::
|
||||
|
||||
sphinx-build -b latex %s _build/latex && make -C _build/latex all-pdf
|
||||
|
||||
|
||||
.. _Sphinx: https://www.sphinx-doc.org/
|
||||
.. _reStructuredText: http://docutils.sourceforge.net/rst.html
|
||||
|
||||
Sphinx supports various `builders
|
||||
<https://www.sphinx-doc.org/en/stable/builders.html>`_, but here are a
|
||||
few suggestions.
|
||||
|
||||
* Build an HTML version::
|
||||
|
||||
sphinx-build -b html . _build/html
|
||||
|
||||
* Build an ePUB3 formatted file, usable on ebook readers::
|
||||
|
||||
sphinx-build -b epub3 . _build/epub
|
||||
|
||||
* Build a PDF file, which incidentally also builds a LaTeX file::
|
||||
|
||||
sphinx-build -b latex %s _build/latex && make -C _build/latex all-pdf
|
||||
|
||||
.. _activate-google-custom-search:
|
||||
.. _builders: https://www.sphinx-doc.org/en/stable/builders.html
|
||||
|
||||
Activate Google Custom Search
|
||||
------------------------------
|
||||
|
||||
You need to `register for a Google API key`_. Set the ``google_API_key``
|
||||
configuration option to your key.
|
||||
|
||||
Then add ``google`` to the list of sources in your configuration (or use
|
||||
default list, which includes it as long as you have an API key).
|
||||
If you use default ``google_engine_ID``, we recommend limiting the sources to
|
||||
``google`` as the other sources are already included in the Google results.
|
||||
|
||||
.. _register for a Google API key: https://console.developers.google.com/
|
||||
|
||||
Optionally, you can `define a custom search engine`_. Get your search engine's
|
||||
token and use it for your ``google_engine_ID`` configuration option. By
|
||||
default, beets use a list of sources known to be scrapeable.
|
||||
|
||||
.. _define a custom search engine: https://www.google.com/cse/all
|
||||
|
||||
Note that the Google custom search API is limited to 100 queries per day.
|
||||
After that, the lyrics plugin will fall back on other declared data sources.
|
||||
|
||||
.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
.. _register for a Google API key: https://console.developers.google.com/
|
||||
.. _define a custom search engine: https://www.google.com/cse/all
|
||||
|
||||
Activate Genius and Tekstowo.pl Lyrics
|
||||
--------------------------------------
|
||||
|
||||
These backends are enabled by default.
|
||||
|
||||
.. _lyrics-translation:
|
||||
|
||||
|
|
@ -161,6 +169,6 @@ You need to register for a Microsoft Azure Marketplace free account and
|
|||
to the `Microsoft Translator API`_. Follow the four steps process, specifically
|
||||
at step 3 enter ``beets`` as *Client ID* and copy/paste the generated
|
||||
*Client secret* into your ``bing_client_secret`` configuration, alongside
|
||||
``bing_lang_to`` target `language code`.
|
||||
``bing_lang_to`` target ``language code``.
|
||||
|
||||
.. _Microsoft Translator API: https://docs.microsoft.com/en-us/azure/cognitive-services/translator/translator-how-to-signup
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@ omit = beets/test/*
|
|||
precision = 2
|
||||
skip_empty = true
|
||||
show_missing = true
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
exclude_also =
|
||||
@atexit.register
|
||||
if TYPE_CHECKING
|
||||
if typing.TYPE_CHECKING
|
||||
raise AssertionError
|
||||
|
|
|
|||
|
|
@ -147,6 +147,27 @@ lyrics_pages = [
|
|||
""",
|
||||
url_title="The Beatles Lady Madonna lyrics",
|
||||
),
|
||||
LyricsPage.make(
|
||||
"https://www.dainuzodziai.lt/m/mergaites-nori-mylet-atlanta/",
|
||||
"""
|
||||
Jos nesuspėja skriet paskui vėją
|
||||
Bangos į krantą grąžina jas vėl
|
||||
Jos karštą saulę paliesti norėjo
|
||||
Ant kranto palikę visas negandas
|
||||
|
||||
Bet jos nori mylėt
|
||||
Jos nenori liūdėt
|
||||
Leisk mergaitėms mylėt
|
||||
Kaip jos moka mylėt
|
||||
Koks vakaras šiltas ir nieko nestinga
|
||||
Veidus apšviečia žaisminga šviesa
|
||||
Jos buvo laimingos prie jūros kur liko
|
||||
Tik vėjas išmokęs visas jų dainas
|
||||
""",
|
||||
artist="Atlanta",
|
||||
track_title="Mergaitės Nori Mylėt",
|
||||
url_title="Mergaitės nori mylėt – Atlanta | Dainų Žodžiai",
|
||||
),
|
||||
LyricsPage.make(
|
||||
"https://genius.com/The-beatles-lady-madonna-lyrics",
|
||||
"""
|
||||
|
|
@ -223,6 +244,20 @@ lyrics_pages = [
|
|||
Mademoiselle Madonna, couchée sur votre lit
|
||||
Listen to the music playing in your head.
|
||||
Vous écoutez la musique qui joue dans votre tête
|
||||
|
||||
Tuesday afternoon is never ending.
|
||||
Le mardi après-midi n'en finit pas
|
||||
Wednesday morning papers didn't come.
|
||||
Le mercredi matin les journaux ne sont pas arrivés
|
||||
Thursday night you stockings needed mending.
|
||||
Jeudi soir, vos bas avaient besoin d'être réparés
|
||||
See how they run.
|
||||
Regardez comme ils filent
|
||||
|
||||
Lady Madonna, children at your feet.
|
||||
Mademoiselle Madonna, les enfants à vos pieds
|
||||
Wonder how you manage to make ends meet.
|
||||
Je me demande comment vous vous débrouillez pour joindre les deux bouts
|
||||
""",
|
||||
url_title="Paroles et traduction The Beatles : Lady Madonna - paroles de chanson", # noqa: E501
|
||||
),
|
||||
|
|
@ -235,29 +270,35 @@ lyrics_pages = [
|
|||
Children at your feet
|
||||
Wonder how you manage
|
||||
To make ends meet
|
||||
|
||||
Who finds the money
|
||||
When you pay the rent?
|
||||
Did you think that money
|
||||
Was Heaven sent?
|
||||
|
||||
Friday night arrives without a suitcase
|
||||
Sunday morning creeping like a nun
|
||||
Monday's child has learned
|
||||
To tie his bootlace
|
||||
See how they run
|
||||
|
||||
Lady Madonna
|
||||
Baby at your breast
|
||||
Wonders how you manage
|
||||
To feed the rest
|
||||
See how they run
|
||||
|
||||
Lady Madonna
|
||||
Lying on the bed
|
||||
Listen to the music
|
||||
Playing in your head
|
||||
|
||||
Tuesday afternoon is neverending
|
||||
Wednesday morning papers didn't come
|
||||
Thursday night your stockings
|
||||
Needed mending
|
||||
See how they run
|
||||
|
||||
Lady Madonna
|
||||
Children at your feet
|
||||
Wonder how you manage
|
||||
|
|
@ -415,15 +456,29 @@ lyrics_pages = [
|
|||
LyricsPage.make(
|
||||
"https://www.musica.com/letras.asp?letra=59862",
|
||||
"""
|
||||
Lady Madonna, children at your feet
|
||||
Wonder how you manage to make ends meet
|
||||
Who finds the money when you pay the rent?
|
||||
Did you think that money was heaven sent?
|
||||
|
||||
Friday night arrives without a suitcase
|
||||
Sunday morning creeping like a nun
|
||||
Monday's child has learned to tie his bootlace
|
||||
See how they run
|
||||
|
||||
Lady Madonna, baby at your breast
|
||||
Wonders how you manage to feed the rest
|
||||
|
||||
See how they run
|
||||
|
||||
Lady Madonna lying on the bed
|
||||
Listen to the music playing in your head
|
||||
|
||||
Tuesday afternoon is never ending
|
||||
Wednesday morning papers didn't come
|
||||
Thursday night your stockings needed mending
|
||||
See how they run
|
||||
|
||||
Lady Madonna, children at your feet
|
||||
Wonder how you manage to make ends meet
|
||||
""",
|
||||
|
|
@ -448,6 +503,14 @@ lyrics_pages = [
|
|||
See how they run.
|
||||
Lady Madonna, lying on the bed,
|
||||
Listen to the music playing in your head.
|
||||
|
||||
Tuesday afternoon is never ending.
|
||||
Wednesday morning papers didn't come.
|
||||
Thursday night your stockings needed mending.
|
||||
See how they run.
|
||||
|
||||
Lady Madonna, children at your feet.
|
||||
Wonder how you manage to make ends meet.
|
||||
""",
|
||||
url_title="Paroles Lady Madonna par The Beatles - Lyrics - Paroles.net",
|
||||
),
|
||||
|
|
@ -480,6 +543,7 @@ lyrics_pages = [
|
|||
Wonder how you manage to make ends meet
|
||||
""",
|
||||
url_title="THE BEATLES - LADY MADONNA LYRICS",
|
||||
marks=[xfail_on_ci("Songlyrics is blocked by Cloudflare")],
|
||||
),
|
||||
LyricsPage.make(
|
||||
"https://sweetslyrics.com/the-beatles/lady-madonna-lyrics",
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ class TestLyricsUtils:
|
|||
@pytest.mark.parametrize(
|
||||
"artist, title",
|
||||
[
|
||||
("Various Artists", "Title"),
|
||||
("Artist", ""),
|
||||
("", "Title"),
|
||||
(" ", ""),
|
||||
|
|
@ -81,7 +82,7 @@ class TestLyricsUtils:
|
|||
@pytest.mark.parametrize(
|
||||
"title, expected_extra_titles",
|
||||
[
|
||||
("1/2", ["1", "2"]),
|
||||
("1/2", []),
|
||||
("1 / 2", ["1", "2"]),
|
||||
("Song (live)", ["Song"]),
|
||||
("Song (live) (new)", ["Song"]),
|
||||
|
|
@ -101,47 +102,6 @@ class TestLyricsUtils:
|
|||
|
||||
assert list(actual_titles) == [title, *expected_extra_titles]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"initial_lyrics, expected",
|
||||
[
|
||||
("Verse\nLyrics credit in the last line", "Verse"),
|
||||
("Lyrics credit in the first line\nVerse", "Verse"),
|
||||
(
|
||||
"""Verse
|
||||
Lyrics mentioned somewhere in the middle
|
||||
Verse""",
|
||||
"""Verse
|
||||
Lyrics mentioned somewhere in the middle
|
||||
Verse""",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_remove_credits(self, initial_lyrics, expected):
|
||||
assert lyrics.remove_credits(initial_lyrics) == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"initial_text, expected",
|
||||
[
|
||||
(
|
||||
"""<!--lyrics below-->
|
||||
one
|
||||
<br class='myclass'>
|
||||
two !
|
||||
<br><br \\>
|
||||
<blink>four</blink>""",
|
||||
"one\ntwo !\n\nfour",
|
||||
),
|
||||
("foo<script>bar</script>baz", "foobaz"),
|
||||
("foo<!--<bar>-->qux", "fooqux"),
|
||||
],
|
||||
)
|
||||
def test_scrape_strip_cruft(self, initial_text, expected):
|
||||
assert lyrics._scrape_strip_cruft(initial_text, True) == expected
|
||||
|
||||
def test_scrape_merge_paragraphs(self):
|
||||
text = "one</p> <p class='myclass'>two</p><p>three"
|
||||
assert lyrics._scrape_merge_paragraphs(text) == "one\ntwo\nthree"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text, expected",
|
||||
[
|
||||
|
|
@ -161,12 +121,67 @@ class TestLyricsUtils:
|
|||
assert lyrics.slug(text) == expected
|
||||
|
||||
|
||||
class TestHtml:
|
||||
def test_scrape_strip_cruft(self):
|
||||
initial = """<!--lyrics below-->
|
||||
one
|
||||
<br class='myclass'>
|
||||
two !
|
||||
<br><br \\>
|
||||
<blink>four</blink>"""
|
||||
expected = "<!--lyrics below-->\none\ntwo !\n\n<blink>four</blink>"
|
||||
|
||||
assert lyrics.Html.normalize_space(initial) == expected
|
||||
|
||||
def test_scrape_merge_paragraphs(self):
|
||||
text = "one</p> <p class='myclass'>two</p><p>three"
|
||||
expected = "one\ntwo\n\nthree"
|
||||
|
||||
assert lyrics.Html.merge_paragraphs(text) == expected
|
||||
|
||||
|
||||
class TestSearchBackend:
|
||||
@pytest.fixture
|
||||
def backend(self, dist_thresh):
|
||||
plugin = lyrics.LyricsPlugin()
|
||||
plugin.config.set({"dist_thresh": dist_thresh})
|
||||
return lyrics.SearchBackend(plugin.config, plugin._log)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dist_thresh, target_artist, artist, should_match",
|
||||
[
|
||||
(0.11, "Target Artist", "Target Artist", True),
|
||||
(0.11, "Target Artist", "Target Artis", True),
|
||||
(0.11, "Target Artist", "Target Arti", False),
|
||||
(0.11, "Psychonaut", "Psychonaut (BEL)", True),
|
||||
(0.11, "beets song", "beats song", True),
|
||||
(0.10, "beets song", "beats song", False),
|
||||
(
|
||||
0.11,
|
||||
"Lucid Dreams (Forget Me)",
|
||||
"Lucid Dreams (Remix) ft. Lil Uzi Vert",
|
||||
False,
|
||||
),
|
||||
(
|
||||
0.12,
|
||||
"Lucid Dreams (Forget Me)",
|
||||
"Lucid Dreams (Remix) ft. Lil Uzi Vert",
|
||||
True,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_check_match(self, backend, target_artist, artist, should_match):
|
||||
result = lyrics.SearchResult(artist, "", "")
|
||||
|
||||
assert backend.check_match(target_artist, "", result) == should_match
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def lyrics_root_dir(pytestconfig: pytest.Config):
|
||||
return pytestconfig.rootpath / "test" / "rsrc" / "lyrics"
|
||||
|
||||
|
||||
class LyricsBackendTest(PluginMixin):
|
||||
class LyricsPluginMixin(PluginMixin):
|
||||
plugin = "lyrics"
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -182,6 +197,42 @@ class LyricsBackendTest(PluginMixin):
|
|||
|
||||
return lyrics.LyricsPlugin()
|
||||
|
||||
|
||||
class TestLyricsPlugin(LyricsPluginMixin):
|
||||
@pytest.fixture
|
||||
def backend_name(self):
|
||||
"""Return lyrics configuration to test."""
|
||||
return "lrclib"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"request_kwargs, expected_log_match",
|
||||
[
|
||||
(
|
||||
{"status_code": HTTPStatus.BAD_GATEWAY},
|
||||
r"LRCLib: Request error: 502",
|
||||
),
|
||||
({"text": "invalid"}, r"LRCLib: Could not decode.*JSON"),
|
||||
],
|
||||
)
|
||||
def test_error_handling(
|
||||
self,
|
||||
requests_mock,
|
||||
lyrics_plugin,
|
||||
caplog,
|
||||
request_kwargs,
|
||||
expected_log_match,
|
||||
):
|
||||
"""Errors are logged with the backend name."""
|
||||
requests_mock.get(lyrics.LRCLib.SEARCH_URL, **request_kwargs)
|
||||
|
||||
assert lyrics_plugin.get_lyrics("", "", "", 0.0) is None
|
||||
assert caplog.messages
|
||||
last_log = caplog.messages[-1]
|
||||
assert last_log
|
||||
assert re.search(expected_log_match, last_log, re.I)
|
||||
|
||||
|
||||
class LyricsBackendTest(LyricsPluginMixin):
|
||||
@pytest.fixture
|
||||
def backend(self, lyrics_plugin):
|
||||
"""Return a lyrics backend instance."""
|
||||
|
|
@ -229,24 +280,23 @@ class TestLyricsSources(LyricsBackendTest):
|
|||
|
||||
def test_backend_source(self, lyrics_plugin, lyrics_page: LyricsPage):
|
||||
"""Test parsed lyrics from each of the configured lyrics pages."""
|
||||
lyrics = lyrics_plugin.get_lyrics(
|
||||
lyrics_info = lyrics_plugin.get_lyrics(
|
||||
lyrics_page.artist, lyrics_page.track_title, "", 186
|
||||
)
|
||||
|
||||
assert lyrics
|
||||
assert lyrics_info
|
||||
lyrics, _ = lyrics_info.split("\n\nSource: ")
|
||||
assert lyrics == lyrics_page.lyrics
|
||||
|
||||
|
||||
class TestGoogleLyrics(LyricsBackendTest):
|
||||
"""Test scraping heuristics on a fake html page."""
|
||||
|
||||
TITLE = "Beets song"
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def backend_name(self):
|
||||
return "google"
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
@pytest.fixture
|
||||
def plugin_config(self):
|
||||
return {"google_API_key": "test"}
|
||||
|
||||
|
|
@ -254,54 +304,59 @@ class TestGoogleLyrics(LyricsBackendTest):
|
|||
def file_name(self):
|
||||
return "examplecom/beetssong"
|
||||
|
||||
@pytest.fixture
|
||||
def search_item(self, url_title, url):
|
||||
return {"title": url_title, "link": url}
|
||||
|
||||
@pytest.mark.parametrize("plugin_config", [{}])
|
||||
def test_disabled_without_api_key(self, lyrics_plugin):
|
||||
assert not lyrics_plugin.backends
|
||||
|
||||
def test_mocked_source_ok(self, backend, lyrics_html):
|
||||
"""Test that lyrics of the mocked page are correctly scraped"""
|
||||
result = lyrics.scrape_lyrics_from_html(lyrics_html).lower()
|
||||
result = backend.scrape(lyrics_html).lower()
|
||||
|
||||
assert result
|
||||
assert backend.is_lyrics(result)
|
||||
assert PHRASE_BY_TITLE[self.TITLE] in result
|
||||
assert PHRASE_BY_TITLE["Beets song"] in result
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url_title, artist, should_be_candidate",
|
||||
"url_title, expected_artist, expected_title",
|
||||
[
|
||||
("John Doe - beets song Lyrics", "John Doe", True),
|
||||
("example.com | Beats song by John doe", "John Doe", True),
|
||||
("example.com | seets bong lyrics by John doe", "John Doe", False),
|
||||
("foo", "Sun O)))", False),
|
||||
("Artist - beets song Lyrics", "Artist", "beets song"),
|
||||
("www.azlyrics.com | Beats song by Artist", "Artist", "Beats song"),
|
||||
("lyric.com | seets bong lyrics by Artist", "Artist", "seets bong"),
|
||||
("foo", "", "foo"),
|
||||
("Artist - Beets Song lyrics | AZLyrics", "Artist", "Beets Song"),
|
||||
("Letra de Artist - Beets Song", "Artist", "Beets Song"),
|
||||
("Letra de Artist - Beets ...", "Artist", "Beets"),
|
||||
("Artist Beets Song", "Artist", "Beets Song"),
|
||||
("BeetsSong - Artist", "Artist", "BeetsSong"),
|
||||
("Artist - BeetsSong", "Artist", "BeetsSong"),
|
||||
("Beets Song", "", "Beets Song"),
|
||||
("Beets Song Artist", "Artist", "Beets Song"),
|
||||
(
|
||||
"BeetsSong (feat. Other & Another) - Artist",
|
||||
"Artist",
|
||||
"BeetsSong (feat. Other & Another)",
|
||||
),
|
||||
(
|
||||
(
|
||||
"Beets song lyrics by Artist - original song full text. "
|
||||
"Official Beets song lyrics, 2024 version | LyricsMode.com"
|
||||
),
|
||||
"Artist",
|
||||
"Beets song",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_is_page_candidate(
|
||||
self, backend, lyrics_html, url_title, artist, should_be_candidate
|
||||
@pytest.mark.parametrize("url", ["http://doesntmatter.com"])
|
||||
def test_make_search_result(
|
||||
self, backend, search_item, expected_artist, expected_title
|
||||
):
|
||||
result = backend.is_page_candidate(
|
||||
"http://www.example.com/lyrics/beetssong",
|
||||
url_title,
|
||||
self.TITLE,
|
||||
artist,
|
||||
)
|
||||
assert bool(result) == should_be_candidate
|
||||
result = backend.make_search_result("Artist", "Beets song", search_item)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"lyrics",
|
||||
[
|
||||
"LyricsMania.com - Copyright (c) 2013 - All Rights Reserved",
|
||||
"""All material found on this site is property\n
|
||||
of mywickedsongtext brand""",
|
||||
"""
|
||||
Lyricsmania staff is working hard for you to add $TITLE lyrics as soon
|
||||
as they'll be released by $ARTIST, check back soon!
|
||||
In case you have the lyrics to $TITLE and want to send them to us, fill out
|
||||
the following form.
|
||||
""",
|
||||
],
|
||||
)
|
||||
def test_bad_lyrics(self, backend, lyrics):
|
||||
assert not backend.is_lyrics(lyrics)
|
||||
|
||||
def test_slugify(self, backend):
|
||||
text = "http://site.com/\xe7afe-au_lait(boisson)"
|
||||
assert backend.slugify(text) == "http://site.com/cafe_au_lait"
|
||||
assert result.artist == expected_artist
|
||||
assert result.title == expected_title
|
||||
|
||||
|
||||
class TestGeniusLyrics(LyricsBackendTest):
|
||||
|
|
@ -312,13 +367,13 @@ class TestGeniusLyrics(LyricsBackendTest):
|
|||
@pytest.mark.parametrize(
|
||||
"file_name, expected_line_count",
|
||||
[
|
||||
("geniuscom/2pacalleyezonmelyrics", 134),
|
||||
("geniuscom/2pacalleyezonmelyrics", 131),
|
||||
("geniuscom/Ttngchinchillalyrics", 29),
|
||||
("geniuscom/sample", 0), # see https://github.com/beetbox/beets/issues/3535
|
||||
],
|
||||
) # fmt: skip
|
||||
def test_scrape(self, backend, lyrics_html, expected_line_count):
|
||||
result = backend._scrape_lyrics_from_html(lyrics_html) or ""
|
||||
result = backend.scrape(lyrics_html) or ""
|
||||
|
||||
assert len(result.splitlines()) == expected_line_count
|
||||
|
||||
|
|
@ -339,7 +394,7 @@ class TestTekstowoLyrics(LyricsBackendTest):
|
|||
],
|
||||
)
|
||||
def test_scrape(self, backend, lyrics_html, expecting_lyrics):
|
||||
assert bool(backend.extract_lyrics(lyrics_html)) == expecting_lyrics
|
||||
assert bool(backend.scrape(lyrics_html)) == expecting_lyrics
|
||||
|
||||
|
||||
LYRICS_DURATION = 950
|
||||
|
|
@ -347,6 +402,7 @@ LYRICS_DURATION = 950
|
|||
|
||||
def lyrics_match(**overrides):
|
||||
return {
|
||||
"id": 1,
|
||||
"instrumental": False,
|
||||
"duration": LYRICS_DURATION,
|
||||
"syncedLyrics": "synced",
|
||||
|
|
@ -363,13 +419,9 @@ class TestLRCLibLyrics(LyricsBackendTest):
|
|||
return "lrclib"
|
||||
|
||||
@pytest.fixture
|
||||
def request_kwargs(self, response_data):
|
||||
return {"json": response_data}
|
||||
|
||||
@pytest.fixture
|
||||
def fetch_lyrics(self, backend, requests_mock, request_kwargs):
|
||||
def fetch_lyrics(self, backend, requests_mock, response_data):
|
||||
requests_mock.get(backend.GET_URL, status_code=HTTPStatus.NOT_FOUND)
|
||||
requests_mock.get(backend.SEARCH_URL, **request_kwargs)
|
||||
requests_mock.get(backend.SEARCH_URL, json=response_data)
|
||||
|
||||
return partial(backend.fetch, "la", "la", "la", self.ITEM_DURATION)
|
||||
|
||||
|
|
@ -379,7 +431,9 @@ class TestLRCLibLyrics(LyricsBackendTest):
|
|||
[({"synced": True}, "synced"), ({"synced": False}, "plain")],
|
||||
)
|
||||
def test_synced_config_option(self, fetch_lyrics, expected_lyrics):
|
||||
assert fetch_lyrics() == expected_lyrics
|
||||
lyrics, _ = fetch_lyrics()
|
||||
|
||||
assert lyrics == expected_lyrics
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_data, expected_lyrics",
|
||||
|
|
@ -441,20 +495,10 @@ class TestLRCLibLyrics(LyricsBackendTest):
|
|||
)
|
||||
@pytest.mark.parametrize("plugin_config", [{"synced": True}])
|
||||
def test_fetch_lyrics(self, fetch_lyrics, expected_lyrics):
|
||||
assert fetch_lyrics() == expected_lyrics
|
||||
lyrics_info = fetch_lyrics()
|
||||
if lyrics_info is None:
|
||||
assert expected_lyrics is None
|
||||
else:
|
||||
lyrics, _ = fetch_lyrics()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"request_kwargs, expected_log_match",
|
||||
[
|
||||
(
|
||||
{"status_code": HTTPStatus.BAD_GATEWAY},
|
||||
r"LRCLib: Request error: 502",
|
||||
),
|
||||
({"text": "invalid"}, r"LRCLib: Could not decode.*JSON"),
|
||||
],
|
||||
)
|
||||
def test_error(self, caplog, fetch_lyrics, expected_log_match):
|
||||
assert fetch_lyrics() is None
|
||||
assert caplog.messages
|
||||
assert (last_log := caplog.messages[-1])
|
||||
assert re.search(expected_log_match, last_log, re.I)
|
||||
assert lyrics == expected_lyrics
|
||||
|
|
|
|||
Loading…
Reference in a new issue