From 24ca6abcfe204b8d8be45c7d3e46e50758b7554d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 23 Feb 2026 07:53:18 +0000 Subject: [PATCH 01/12] lyrics: validate synced lyrics duration --- beetsplug/lyrics.py | 22 ++++++++++++++++++++-- docs/changelog.rst | 3 +++ test/plugins/test_lyrics.py | 31 ++++++++++++++++++++++--------- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 72df907db..b5bed18d3 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -262,17 +262,35 @@ class LRCLyrics: """Compare two lyrics items by their score.""" return self.dist < other.dist + @classmethod + def verify_synced_lyrics( + cls, duration: float, lyrics: str | None + ) -> str | None: + if lyrics and ( + m := Translator.LINE_PARTS_RE.match(lyrics.splitlines()[-1]) + ): + ts, _ = m.groups() + if ts: + mm, ss = map(float, ts.strip("[]").split(":")) + if 60 * mm + ss <= duration: + return lyrics + + return None + @classmethod def make( cls, candidate: LRCLibAPI.Item, target_duration: float ) -> LRCLyrics: + duration = candidate["duration"] or 0.0 return cls( target_duration, candidate["id"], - candidate["duration"] or 0.0, + duration, candidate["instrumental"], candidate["plainLyrics"], - candidate["syncedLyrics"], + cls.verify_synced_lyrics( + target_duration, candidate["syncedLyrics"] + ), ) @cached_property diff --git a/docs/changelog.rst b/docs/changelog.rst index 7318057b3..3bf959374 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -84,6 +84,9 @@ Other changes Since genres are now stored as a list in the ``genres`` field and written to files as individual genre tags, this option has no effect and has been removed. +- :doc:`plugins/lyrics`: To cut down noise from the ``lrclib`` lyrics source, + synced lyrics are now checked to ensure the final verse falls within the + track's duration. 2.6.2 (February 22, 2026) ------------------------- diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 376f0b9f2..9c7ee02c9 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -448,7 +448,7 @@ def lyrics_match(**overrides): "id": 1, "instrumental": False, "duration": LYRICS_DURATION, - "syncedLyrics": "synced", + "syncedLyrics": "[00:00.00] synced", "plainLyrics": "plain", **overrides, } @@ -456,6 +456,7 @@ def lyrics_match(**overrides): class TestLRCLibLyrics(LyricsBackendTest): ITEM_DURATION = 999 + SYNCED = "[00:00.00] synced" @pytest.fixture(scope="class") def backend_name(self): @@ -471,11 +472,13 @@ class TestLRCLibLyrics(LyricsBackendTest): @pytest.mark.parametrize("response_data", [[lyrics_match()]]) @pytest.mark.parametrize( "plugin_config, expected_lyrics", - [({"synced": True}, "synced"), ({"synced": False}, "plain")], + [({"synced": True}, SYNCED), ({"synced": False}, "plain")], ) def test_synced_config_option(self, fetch_lyrics, expected_lyrics): - lyrics, _ = fetch_lyrics() + lyrics_info = fetch_lyrics() + assert lyrics_info + lyrics, _ = lyrics_info assert lyrics == expected_lyrics @pytest.mark.parametrize( @@ -484,7 +487,7 @@ class TestLRCLibLyrics(LyricsBackendTest): pytest.param([], None, id="handle non-matching lyrics"), pytest.param( [lyrics_match()], - "synced", + SYNCED, id="synced when available", ), pytest.param( @@ -509,9 +512,9 @@ class TestLRCLibLyrics(LyricsBackendTest): syncedLyrics=None, plainLyrics="plain with closer duration", ), - lyrics_match(syncedLyrics="synced", plainLyrics="plain 2"), + lyrics_match(syncedLyrics=SYNCED, plainLyrics="plain 2"), ], - "synced", + SYNCED, id="prefer synced lyrics even if plain duration is closer", ), pytest.param( @@ -529,9 +532,18 @@ class TestLRCLibLyrics(LyricsBackendTest): "valid plain", id="ignore synced with invalid duration", ), + pytest.param( + [ + lyrics_match( + duration=59, syncedLyrics="[01:00.00] invalid synced" + ) + ], + None, + id="ignore synced with a timestamp longer than duration", + ), pytest.param( [lyrics_match(syncedLyrics=None), lyrics_match()], - "synced", + SYNCED, id="prefer match with synced lyrics", ), ], @@ -539,9 +551,10 @@ class TestLRCLibLyrics(LyricsBackendTest): @pytest.mark.parametrize("plugin_config", [{"synced": True}]) def test_fetch_lyrics(self, fetch_lyrics, expected_lyrics): lyrics_info = fetch_lyrics() - if lyrics_info is None: - assert expected_lyrics is None + if expected_lyrics is None: + assert not lyrics_info else: + assert lyrics_info lyrics, _ = fetch_lyrics() assert lyrics == expected_lyrics From 82bfc034949b7b7ccb54867c4fe080e0ece8968d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 23 Feb 2026 08:33:17 +0000 Subject: [PATCH 02/12] Preserve synced lyrics when fetched result is plain text When lyrics.synced is enabled, avoid replacing existing synced lyrics with newly fetched unsynced lyrics, even with force enabled. Allow replacement when the new lyrics are also synced, or when synced mode is disabled. --- beetsplug/lyrics.py | 9 +++++++- docs/changelog.rst | 3 +++ docs/plugins/lyrics.rst | 4 +++- test/plugins/test_lyrics.py | 45 +++++++++++++++++++++++++++++-------- 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index b5bed18d3..c71ee24d0 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -58,6 +58,7 @@ if TYPE_CHECKING: ) INSTRUMENTAL_LYRICS = "[Instrumental]" +SYNCED_LYRICS_PAT = re.compile(r"\[\d\d:\d\d.\d\d\]") class CaptchaError(requests.exceptions.HTTPError): @@ -1107,7 +1108,13 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin): self.info("🔴 Lyrics not found: {}", item) lyrics = self.config["fallback"].get() - if lyrics not in {None, item.lyrics}: + has_new_lyrics = lyrics not in {None, item.lyrics} + synced_mode = self.config["synced"].get(bool) + existing_synced = bool(SYNCED_LYRICS_PAT.search(item.lyrics)) + new_synced = bool(SYNCED_LYRICS_PAT.search(lyrics or "")) + if has_new_lyrics and not ( + synced_mode and existing_synced and not new_synced + ): item.lyrics = lyrics if write: item.try_write() diff --git a/docs/changelog.rst b/docs/changelog.rst index 3bf959374..3c3fb0d51 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -38,6 +38,9 @@ New features 3. Comma followed by a space 4. Slash wrapped by spaces +- :doc:`plugins/lyrics`: With ``synced`` enabled, existing synced lyrics are no + longer replaced by newly fetched plain lyrics, even when ``force`` is enabled. + Bug fixes ~~~~~~~~~ diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index 33aa9b61e..3ebd9fc17 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -83,7 +83,9 @@ The available options are: deactivated if no ``google_API_key`` is setup. By default, ``musixmatch`` and ``tekstowo`` are excluded because they block the beets User-Agent. - **synced**: Prefer synced lyrics over plain lyrics if a source offers them. - Currently ``lrclib`` is the only source that provides them. + Currently ``lrclib`` is the only source that provides them. Using this option, + existing synced lyrics are not replaced by newly fetched plain lyrics (even + when ``force`` is enabled). To allow that replacement, disable ``synced``. .. _beets custom search engine: https://www.google.com:443/cse/publicurl?cx=009217259823014548361:lndtuqkycfu diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 9c7ee02c9..a72468cef 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -249,21 +249,48 @@ class TestLyricsPlugin(LyricsPluginMixin): assert re.search(expected_log_match, last_log, re.I) @pytest.mark.parametrize( - "plugin_config, found, expected", + "plugin_config, old_lyrics, found, expected", [ - ({}, "new", "old"), - ({"force": True}, "new", "new"), - ({"force": True, "local": True}, "new", "old"), - ({"force": True, "fallback": None}, "", "old"), - ({"force": True, "fallback": ""}, "", ""), - ({"force": True, "fallback": "default"}, "", "default"), + ({}, "old", "new", "old"), + ({"force": True}, "old", "new", "new"), + ({"force": True, "local": True}, "old", "new", "old"), + ({"force": True, "fallback": None}, "old", "", "old"), + ({"force": True, "fallback": ""}, "old", "", ""), + ({"force": True, "fallback": "default"}, "old", "", "default"), + pytest.param( + {"force": True, "synced": True}, + "[00:00.00] old synced", + "new plain", + "[00:00.00] old synced", + id="keep-existing-synced-lyrics", + ), + pytest.param( + {"force": True, "synced": True}, + "[00:00.00] old synced", + "[00:00.00] new synced", + "[00:00.00] new synced", + id="replace-with-new-synced-lyrics", + ), + pytest.param( + {"force": True, "synced": False}, + "[00:00.00] old synced", + "new plain", + "new plain", + id="replace-with-unsynced-lyrics-when-disabled", + ), ], ) def test_overwrite_config( - self, monkeypatch, helper, lyrics_plugin, found, expected + self, + monkeypatch, + helper, + lyrics_plugin, + old_lyrics, + found, + expected, ): monkeypatch.setattr(lyrics_plugin, "find_lyrics", lambda _: found) - item = helper.create_item(id=1, lyrics="old") + item = helper.create_item(id=1, lyrics=old_lyrics) lyrics_plugin.add_item_lyrics(item, False) From cd95c15a0b14ad0191fe61bddd151749d97105f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 23 Feb 2026 08:42:08 +0000 Subject: [PATCH 03/12] Add a test to show duplicate translations we have right now --- test/plugins/test_lyrics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index a72468cef..155a80d4b 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -597,6 +597,7 @@ class TestTranslation: " | [Refrain : Doja Cat]" " | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501 " | Mon corps ne me laissait pas le cacher (Cachez-le)" + " | [Chorus]" " | Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501 " | Chevauchant à travers le tonnerre, la foudre" ) @@ -630,6 +631,7 @@ class TestTranslation: [Refrain: Doja Cat] Hard for me to let you go (Let you go, let you go) My body wouldn't let me hide it (Hide it) + [Chorus] No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) Ridin' through the thunder, lightnin'""", "", @@ -637,6 +639,7 @@ class TestTranslation: [Refrain: Doja Cat] / [Refrain : Doja Cat] Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir) My body wouldn't let me hide it (Hide it) / Mon corps ne me laissait pas le cacher (Cachez-le) + [Chorus] / [Chorus] No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) / Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas) Ridin' through the thunder, lightnin' / Chevauchant à travers le tonnerre, la foudre""", # noqa: E501 id="plain", From c2392751931b987e07b64b21f9d74eb85ccd5391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 23 Feb 2026 08:42:58 +0000 Subject: [PATCH 04/12] Do not split orig/trans if they are not different --- beetsplug/lyrics.py | 5 +++-- test/plugins/test_lyrics.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index c71ee24d0..4e732c22e 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -36,6 +36,7 @@ from unidecode import unidecode from beets import plugins, ui from beets.autotag.distance import string_dist +from beets.util import unique_list from beets.util.config import sanitize_choices from ._utils.requests import HTTPNotFoundError, RequestHandler @@ -819,8 +820,8 @@ class Translator(LyricsRequestHandler): timestamps = [ts for ts, _ in ts_and_text] text_pairs = self.get_translations([ln for _, ln in ts_and_text]) - # only add the separator for non-empty translations - texts = [" / ".join(filter(None, p)) for p in text_pairs] + # only add the separator for non-empty and differing translations + texts = [" / ".join(unique_list(filter(None, p))) for p in text_pairs] # only add the space between non-empty timestamps and texts return [" ".join(filter(None, p)) for p in zip(timestamps, texts)] diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 155a80d4b..167673638 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -639,7 +639,7 @@ class TestTranslation: [Refrain: Doja Cat] / [Refrain : Doja Cat] Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir) My body wouldn't let me hide it (Hide it) / Mon corps ne me laissait pas le cacher (Cachez-le) - [Chorus] / [Chorus] + [Chorus] No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) / Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas) Ridin' through the thunder, lightnin' / Chevauchant à travers le tonnerre, la foudre""", # noqa: E501 id="plain", From 835115a6f7b54f41d6faffc8fd83df8f632d8bfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Wed, 25 Feb 2026 21:07:26 +0000 Subject: [PATCH 05/12] Fix genius end to end lyrics test --- beetsplug/_typing.py | 9 +++++++++ beetsplug/lyrics.py | 14 +++++++++++++- test/plugins/lyrics_pages.py | 23 +++++++++++------------ 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/beetsplug/_typing.py b/beetsplug/_typing.py index b772ffdd5..6c69aeed4 100644 --- a/beetsplug/_typing.py +++ b/beetsplug/_typing.py @@ -85,6 +85,15 @@ class GeniusAPI: class Search(TypedDict): response: GeniusAPI.SearchResponse + class StatusResponse(TypedDict): + status: int + message: str + + class Meta(TypedDict): + meta: GeniusAPI.StatusResponse + + Response = Search | Meta + class GoogleCustomSearchAPI: class Response(TypedDict): diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 4e732c22e..26c95c4e6 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -67,6 +67,10 @@ class CaptchaError(requests.exceptions.HTTPError): super().__init__("Captcha is required", *args, **kwargs) +class GeniusHTTPError(requests.exceptions.HTTPError): + pass + + # Utilities. @@ -564,8 +568,16 @@ class Genius(SearchBackend): def headers(self) -> dict[str, str]: return {"Authorization": f"Bearer {self.config['genius_api_key']}"} + def get_json(self, *args, **kwargs) -> GeniusAPI.Search: + response: GeniusAPI.Response = super().get_json(*args, **kwargs) + if "response" in response: + return response # type: ignore[return-value] + + meta = response["meta"] + raise GeniusHTTPError(f"{meta['message']} Status: {meta['status']}") + def search(self, artist: str, title: str) -> Iterable[SearchResult]: - search_data: GeniusAPI.Search = self.get_json( + search_data = self.get_json( self.SEARCH_URL, params={"q": f"{artist} {title}"}, headers=self.headers, diff --git a/test/plugins/lyrics_pages.py b/test/plugins/lyrics_pages.py index 047b6e443..ce2728b64 100644 --- a/test/plugins/lyrics_pages.py +++ b/test/plugins/lyrics_pages.py @@ -133,15 +133,13 @@ lyrics_pages = [ LyricsPage.make( "https://genius.com/The-beatles-lady-madonna-lyrics", """ - [Intro: Instrumental] - [Verse 1: Paul McCartney] Lady Madonna, children at your feet Wonder how you manage to make ends meet Who finds the money when you pay the rent? Did you think that money was heaven sent? - [Bridge: Paul McCartney] + [Bridge: Paul McCartney, Paul McCartney, John Lennon & George Harrison] Friday night arrives without a suitcase Sunday morning creeping like a nun Monday's child has learned to tie his bootlace @@ -150,27 +148,28 @@ lyrics_pages = [ [Verse 2: Paul McCartney] Lady Madonna, baby at your breast Wonders how you manage to feed the rest - - [Bridge: Paul McCartney, John Lennon & George Harrison] [Tenor Saxophone Solo: Ronnie Scott] + + [Bridge: John Lennon & George Harrison, Paul McCartney, John Lennon & George Harrison] + Pa-pa-pa-pa, pa-pa-pa-pa-pa + Pa-pa-pa-pa-pa, pa-pa-pa, pa-pa, pa-pa + Pa-pa-pa-pa, pa-pa-pa-pa-pa See how they run [Verse 3: Paul McCartney] Lady Madonna, lying on the bed Listen to the music playing in your head - [Bridge: Paul McCartney] - Tuesday afternoon is never ending - Wednesday morning papers didn't come - Thursday night your stockings needed mending + [Bridge: Paul McCartney, John Lennon & George Harrison, Paul McCartney, John Lennon & George Harrison] + Tuesday afternoon is never ending (Pa-pa-pa-pa, pa-pa-pa-pa-pa) + Wednesday morning, papers didn't come (Pa-pa-pa-pa-pa, pa-pa-pa, pa-pa, pa-pa) + Thursday night, your stockings needed mending (Pa-pa-pa-pa, pa-pa-pa-pa-pa) See how they run [Verse 4: Paul McCartney] Lady Madonna, children at your feet Wonder how you manage to make ends meet - - [Outro: Instrumental] - """, + """, # noqa: E501 marks=[xfail_on_ci("Genius returns 403 FORBIDDEN in CI")], ), LyricsPage.make( From 38708ae592d1c4cd72c88527fb2198e57606d8a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Tue, 3 Mar 2026 18:16:42 +0000 Subject: [PATCH 06/12] Refactor lyrics handling to use structured Lyrics object * Introduce a `Lyrics` dataclass to carry text, source URL, and language metadata through fetch, translation, and storage paths. * Return `Lyrics` from backends and plugin lookup methods instead of raw tuples/strings. * Store backend name in `lyrics_source` derived from fetched URL root domain. * Simplify translator flow to operate on `Lyrics`, reuse line splitting, append translations in-place, and record translation language metadata. --- beetsplug/lyrics.py | 233 ++++++++++++++++++++++------------- docs/changelog.rst | 4 + docs/plugins/lyrics.rst | 14 ++- test/plugins/lyrics_pages.py | 5 +- test/plugins/test_lyrics.py | 151 +++++++++++++++++------ 5 files changed, 278 insertions(+), 129 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 26c95c4e6..e1e6e4669 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -21,7 +21,7 @@ import math import re import textwrap from contextlib import contextmanager, suppress -from dataclasses import dataclass +from dataclasses import dataclass, field from functools import cached_property, partial, total_ordering from html import unescape from itertools import groupby @@ -36,6 +36,7 @@ from unidecode import unidecode from beets import plugins, ui from beets.autotag.distance import string_dist +from beets.dbcore import types from beets.util import unique_list from beets.util.config import sanitize_choices @@ -59,7 +60,6 @@ if TYPE_CHECKING: ) INSTRUMENTAL_LYRICS = "[Instrumental]" -SYNCED_LYRICS_PAT = re.compile(r"\[\d\d:\d\d.\d\d\]") class CaptchaError(requests.exceptions.HTTPError): @@ -231,6 +231,84 @@ class LyricsRequestHandler(RequestHandler): self.warn("Request error: {}", exc) +@dataclass +class Lyrics: + TRANSLATION_PAT = re.compile(r" / [^\n]+") + LINE_PARTS_PAT = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + + text: str + backend: str + url: str | None = None + language: str | None = None + translation_language: str | None = None + translations: list[str] = field(default_factory=list) + + def __post_init__(self) -> None: + if not self.language and self.text: + with suppress(langdetect.LangDetectException): + self.language = langdetect.detect(self.original_text).upper() + + @classmethod + def from_item(cls, item: Item) -> Lyrics: + data = {"text": item.lyrics} + for key in ("backend", "url", "language", "translation_language"): + data[key] = item.get(f"lyrics_{key}", with_album=False) + + return cls(**data) + + @cached_property + def original_text(self) -> str: + """Return the original text without translations.""" + # Remove translations from the lyrics text. + return self.TRANSLATION_PAT.sub("", self.text).strip() + + @cached_property + def _split_lines(self) -> list[tuple[str, str]]: + """Append translations to the given lyrics texts. + + Lines may contain timestamps from LRCLib which need to be temporarily + removed for the translation. They can take any of these forms: + - empty + Text - text only + [00:00.00] - timestamp only + [00:00.00] Text - timestamp with text + """ + return [ + (m[1], m[2]) if (m := self.LINE_PARTS_PAT.match(line)) else ("", "") + for line in self.text.splitlines() + ] + + @cached_property + def timestamps(self) -> list[str]: + return [ts for ts, _ in self._split_lines] + + @cached_property + def text_lines(self) -> list[str]: + return [ln for _, ln in self._split_lines] + + @property + def synced(self) -> bool: + return any(self.timestamps) + + @property + def translated(self) -> bool: + return bool(self.translation_language) + + @property + def full_text(self) -> str: + if not self.translations: + return self.text + + text_pairs = list(zip(self.text_lines, self.translations)) + + # only add the separator for non-empty and differing translations + texts = [" / ".join(unique_list(filter(None, p))) for p in text_pairs] + # only add the space between non-empty timestamps and texts + return "\n".join( + " ".join(filter(None, p)) for p in zip(self.timestamps, texts) + ) + + class BackendClass(type): @property def name(cls) -> str: @@ -247,7 +325,7 @@ class Backend(LyricsRequestHandler, metaclass=BackendClass): def fetch( self, artist: str, title: str, album: str, length: int - ) -> tuple[str, str] | None: + ) -> Lyrics | None: raise NotImplementedError @@ -273,7 +351,7 @@ class LRCLyrics: cls, duration: float, lyrics: str | None ) -> str | None: if lyrics and ( - m := Translator.LINE_PARTS_RE.match(lyrics.splitlines()[-1]) + m := Lyrics.LINE_PARTS_PAT.match(lyrics.splitlines()[-1]) ): ts, _ = m.groups() if ts: @@ -375,7 +453,7 @@ class LRCLib(Backend): def fetch( self, artist: str, title: str, album: str, length: int - ) -> tuple[str, str] | None: + ) -> Lyrics | None: """Fetch lyrics text for the given song data.""" evaluate_item = partial(LRCLyrics.make, target_duration=length) @@ -383,7 +461,9 @@ class LRCLib(Backend): candidates = [evaluate_item(item) for item in group] if item := self.pick_best_match(candidates): lyrics = item.get_text(self.config["synced"].get(bool)) - return lyrics, f"{self.GET_URL}/{item.id}" + return Lyrics( + lyrics, self.__class__.name, f"{self.GET_URL}/{item.id}" + ) return None @@ -411,7 +491,7 @@ class MusiXmatch(Backend): def build_url(cls, *args: str) -> str: return cls.URL_TEMPLATE.format(*map(cls.encode, args)) - def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None: + def fetch(self, artist: str, title: str, *_) -> Lyrics | None: url = self.build_url(artist, title) html = self.get_text(url) @@ -433,7 +513,7 @@ class MusiXmatch(Backend): # sometimes there are non-existent lyrics with some content if "Lyrics | Musixmatch" in lyrics: return None - return lyrics, url + return Lyrics(lyrics, self.__class__.name, url) class Html: @@ -536,13 +616,13 @@ class SearchBackend(SoupMixin, Backend): if check_match(candidate): yield candidate - def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None: + def fetch(self, artist: str, title: str, *_) -> Lyrics | None: """Fetch lyrics for the given artist and title.""" for result in self.get_results(artist, title): if (html := self.get_text(result.url)) and ( lyrics := self.scrape(html) ): - return lyrics, result.url + return Lyrics(lyrics, self.__class__.name, result.url) return None @@ -763,11 +843,7 @@ class Google(SearchBackend): @dataclass class Translator(LyricsRequestHandler): TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" - LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") SEPARATOR = " | " - remove_translations = staticmethod( - partial(re.compile(r" / [^\n]+").sub, "") - ) _log: Logger api_key: str @@ -789,7 +865,7 @@ class Translator(LyricsRequestHandler): [x.upper() for x in from_languages or []], ) - def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]: + def get_translations(self, texts: Iterable[str]) -> list[str]: """Return translations for the given texts. To reduce the translation 'cost', we translate unique texts, and then @@ -807,37 +883,9 @@ class Translator(LyricsRequestHandler): translated_text = data[0]["translations"][0]["text"] translations = translated_text.split(self.SEPARATOR) trans_by_text = dict(zip(unique_texts, translations)) - return list(zip(texts, (trans_by_text.get(t, "") for t in texts))) + return [trans_by_text.get(t, "") for t in texts] - @classmethod - def split_line(cls, line: str) -> tuple[str, str]: - """Split line to (timestamp, text).""" - if m := cls.LINE_PARTS_RE.match(line): - return m[1], m[2] - - return "", "" - - def append_translations(self, lines: Iterable[str]) -> list[str]: - """Append translations to the given lyrics texts. - - Lines may contain timestamps from LRCLib which need to be temporarily - removed for the translation. They can take any of these forms: - - empty - Text - text only - [00:00:00] - timestamp only - [00:00:00] Text - timestamp with text - """ - # split into [(timestamp, text), ...]] - ts_and_text = list(map(self.split_line, lines)) - timestamps = [ts for ts, _ in ts_and_text] - text_pairs = self.get_translations([ln for _, ln in ts_and_text]) - - # only add the separator for non-empty and differing translations - texts = [" / ".join(unique_list(filter(None, p))) for p in text_pairs] - # only add the space between non-empty timestamps and texts - return [" ".join(filter(None, p)) for p in zip(timestamps, texts)] - - def translate(self, new_lyrics: str, old_lyrics: str) -> str: + def translate(self, lyrics: Lyrics, old_lyrics: Lyrics) -> Lyrics: """Translate the given lyrics to the target language. Check old lyrics for existing translations and return them if their @@ -846,38 +894,33 @@ class Translator(LyricsRequestHandler): If the lyrics are already in the target language or not in any of of the source languages (if configured), they are returned as is. - - The footer with the source URL is preserved, if present. """ if ( - " / " in old_lyrics - and self.remove_translations(old_lyrics) == new_lyrics - ): + lyrics.original_text + ) == old_lyrics.original_text and old_lyrics.translated: self.info("🔵 Translations already exist") return old_lyrics - lyrics_language = langdetect.detect(new_lyrics).upper() - if lyrics_language == self.to_language: + if (lyrics_language := lyrics.language) == self.to_language: self.info( "🔵 Lyrics are already in the target language {.to_language}", self, ) - return new_lyrics - - if self.from_languages and lyrics_language not in self.from_languages: + elif ( + from_lang_config := self.from_languages + ) and lyrics_language not in from_lang_config: self.info( - "🔵 Configuration {.from_languages} does not permit translating" - " from {}", - self, + "🔵 Configuration {} does not permit translating from {}", + from_lang_config, lyrics_language, ) - return new_lyrics + else: + with self.handle_request(): + lyrics.translations = self.get_translations(lyrics.text_lines) + lyrics.translation_language = self.to_language + self.info("🟢 Translated lyrics to {.to_language}", self) - lyrics, *url = new_lyrics.split("\n\nSource: ") - with self.handle_request(): - translated_lines = self.append_translations(lyrics.splitlines()) - self.info("🟢 Translated lyrics to {.to_language}", self) - return "\n\nSource: ".join(["\n".join(translated_lines), *url]) + return lyrics @dataclass @@ -975,6 +1018,12 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin): BACKEND_BY_NAME: ClassVar[dict[str, type[Backend]]] = { b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch] } + item_types: ClassVar[dict[str, types.Type]] = { + "lyrics_url": types.STRING, + "lyrics_backend": types.STRING, + "lyrics_language": types.STRING, + "lyrics_translation_language": types.STRING, + } @cached_property def backends(self) -> list[Backend]: @@ -1089,18 +1138,15 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin): for item in task.imported_items(): self.add_item_lyrics(item, False) - def find_lyrics(self, item: Item) -> str: + def find_lyrics(self, item: Item) -> Lyrics | None: album, length = item.album, round(item.length) matches = ( - [ - lyrics - for t in titles - if (lyrics := self.get_lyrics(a, t, album, length)) - ] + self.get_lyrics(a, t, album, length) for a, titles in search_pairs(item) + for t in titles ) - return "\n\n---\n\n".join(next(filter(None, matches), [])) + return next(filter(None, matches), None) def add_item_lyrics(self, item: Item, write: bool) -> None: """Fetch and store lyrics for a single item. If ``write``, then the @@ -1113,35 +1159,44 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin): self.info("🔵 Lyrics already present: {}", item) return - if lyrics := self.find_lyrics(item): + existing_lyrics = Lyrics.from_item(item) + if new_lyrics := self.find_lyrics(item): self.info("🟢 Found lyrics: {}", item) if translator := self.translator: - lyrics = translator.translate(lyrics, item.lyrics) + new_lyrics = translator.translate(new_lyrics, existing_lyrics) + + synced_mode = self.config["synced"].get(bool) + if synced_mode and existing_lyrics.synced and not new_lyrics.synced: + self.info( + "🔴 Not updating synced lyrics with non-synced ones: {}", + item, + ) + return + + for key in ("backend", "url", "language", "translation_language"): + item_key = f"lyrics_{key}" + if value := getattr(new_lyrics, key): + item[item_key] = value + elif item_key in item: + del item[item_key] + + lyrics_text = new_lyrics.full_text else: self.info("🔴 Lyrics not found: {}", item) - lyrics = self.config["fallback"].get() + lyrics_text = self.config["fallback"].get() - has_new_lyrics = lyrics not in {None, item.lyrics} - synced_mode = self.config["synced"].get(bool) - existing_synced = bool(SYNCED_LYRICS_PAT.search(item.lyrics)) - new_synced = bool(SYNCED_LYRICS_PAT.search(lyrics or "")) - if has_new_lyrics and not ( - synced_mode and existing_synced and not new_synced - ): - item.lyrics = lyrics + if lyrics_text not in {None, item.lyrics}: + item.lyrics = lyrics_text + item.store() if write: item.try_write() - item.store() - def get_lyrics(self, artist: str, title: str, *args) -> str | None: - """Fetch lyrics, trying each source in turn. Return a string or - None if no lyrics were found. - """ + def get_lyrics(self, artist: str, title: str, *args) -> Lyrics | None: + """Get first found lyrics, trying each source in turn.""" self.info("Fetching lyrics for {} - {}", artist, title) for backend in self.backends: with backend.handle_request(): if lyrics_info := backend.fetch(artist, title, *args): - lyrics, url = lyrics_info - return f"{lyrics}\n\nSource: {url}" + return lyrics_info return None diff --git a/docs/changelog.rst b/docs/changelog.rst index 3c3fb0d51..de5683952 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -40,6 +40,10 @@ New features - :doc:`plugins/lyrics`: With ``synced`` enabled, existing synced lyrics are no longer replaced by newly fetched plain lyrics, even when ``force`` is enabled. +- :doc:`plugins/lyrics`: Remove ``Source: `` suffix from lyrics. + Store the backend name in ``lyrics_backend``, URL in ``lyrics_url``, language + in ``lyrics_language`` and translation language (if enabled) in + ``lyrics_translation_language`` flexible attributes. :bug:`6370` Bug fixes ~~~~~~~~~ diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index 3ebd9fc17..83e24a645 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -25,9 +25,17 @@ Fetch Lyrics During Import -------------------------- When importing new files, beets will now fetch lyrics for files that don't -already have them. The lyrics will be stored in the beets database. If the -``import.write`` config option is on, then the lyrics will also be written to -the files' tags. +already have them. The lyrics will be stored in the beets database. The plugin +also sets a few useful flexible attributes: + +- ``lyrics_backend``: name of the backend that provided the lyrics +- ``lyrics_url``: URL of the page where the lyrics were found +- ``lyrics_language``: original language of the lyrics +- ``lyrics_translation_language``: language of the lyrics translation (if + translation is enabled) + +If the ``import.write`` config option is on, then the lyrics will also be +written to the files' tags. Configuration ------------- diff --git a/test/plugins/lyrics_pages.py b/test/plugins/lyrics_pages.py index ce2728b64..1df53f731 100644 --- a/test/plugins/lyrics_pages.py +++ b/test/plugins/lyrics_pages.py @@ -23,6 +23,7 @@ class LyricsPage(NamedTuple): lyrics: str artist: str = "The Beatles" track_title: str = "Lady Madonna" + language: str = "EN" url_title: str | None = None # only relevant to the Google backend marks: list[str] = [] # markers for pytest.param # noqa: RUF012 @@ -127,6 +128,7 @@ lyrics_pages = [ """, artist="Atlanta", track_title="Mergaitės Nori Mylėt", + language="LT", url_title="Mergaitės nori mylėt – Atlanta | Dainų Žodžiai", marks=[xfail_on_ci("Expired SSL certificate")], ), @@ -221,6 +223,7 @@ lyrics_pages = [ Je me demande comment vous vous débrouillez pour joindre les deux bouts """, url_title="Paroles et traduction The Beatles : Lady Madonna - paroles de chanson", # noqa: E501 + language="FR", ), LyricsPage.make( # note that this URL needs to be followed with a slash, otherwise it @@ -268,7 +271,7 @@ lyrics_pages = [ url_title="Lady Madonna - The Beatles - LETRAS.MUS.BR", ), LyricsPage.make( - "https://lrclib.net/api/get/14038", + "https://lrclib.net/api/get/19648857", """ [00:08.35] Lady Madonna, children at your feet [00:12.85] Wonder how you manage to make ends meet diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 167673638..f2321391d 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -23,10 +23,12 @@ from http import HTTPStatus from typing import TYPE_CHECKING import pytest +import requests from beets.library import Item from beets.test.helper import PluginMixin, TestHelper from beetsplug import lyrics +from beetsplug.lyrics import Lyrics from .lyrics_pages import lyrics_pages @@ -251,12 +253,48 @@ class TestLyricsPlugin(LyricsPluginMixin): @pytest.mark.parametrize( "plugin_config, old_lyrics, found, expected", [ - ({}, "old", "new", "old"), - ({"force": True}, "old", "new", "new"), - ({"force": True, "local": True}, "old", "new", "old"), - ({"force": True, "fallback": None}, "old", "", "old"), - ({"force": True, "fallback": ""}, "old", "", ""), - ({"force": True, "fallback": "default"}, "old", "", "default"), + pytest.param( + {}, + "old", + "new", + "old", + id="no_force_keeps_old", + ), + pytest.param( + {"force": True}, + "old", + "new", + "new", + id="force_overwrites_with_new", + ), + pytest.param( + {"force": True, "local": True}, + "old", + "new", + "old", + id="force_local_keeps_old", + ), + pytest.param( + {"force": True, "fallback": None}, + "old", + None, + "old", + id="force_fallback_none_keeps_old", + ), + pytest.param( + {"force": True, "fallback": ""}, + "old", + None, + "", + id="force_fallback_empty_uses_empty", + ), + pytest.param( + {"force": True, "fallback": "default"}, + "old", + None, + "default", + id="force_fallback_default_uses_default", + ), pytest.param( {"force": True, "synced": True}, "[00:00.00] old synced", @@ -289,13 +327,41 @@ class TestLyricsPlugin(LyricsPluginMixin): found, expected, ): - monkeypatch.setattr(lyrics_plugin, "find_lyrics", lambda _: found) + monkeypatch.setattr( + lyrics_plugin, + "find_lyrics", + lambda _: Lyrics(found, "") if found is not None else None, + ) item = helper.create_item(id=1, lyrics=old_lyrics) lyrics_plugin.add_item_lyrics(item, False) assert item.lyrics == expected + def test_set_additional_lyrics_info( + self, monkeypatch, helper, lyrics_plugin + ): + lyrics = Lyrics( + "sing in the rain every hour of the day", + "lrclib", + url="https://lrclib.net/api/1", + ) + monkeypatch.setattr(lyrics_plugin, "find_lyrics", lambda _: lyrics) + item = helper.add_item( + id=1, lyrics="", lyrics_translation_language="EN" + ) + + lyrics_plugin.add_item_lyrics(item, False) + + item = helper.lib.get_item(item.id) + + assert item.lyrics_url == lyrics.url + assert item.lyrics_backend == lyrics.backend + assert item.lyrics_language == "EN" + # make sure translation language is cleared + with pytest.raises(AttributeError): + item.lyrics_translation_language + class LyricsBackendTest(LyricsPluginMixin): @pytest.fixture @@ -343,21 +409,29 @@ class TestLyricsSources(LyricsBackendTest): } requests_mock.get(lyrics.Google.SEARCH_URL, json=data) - def test_backend_source(self, lyrics_plugin, lyrics_page: LyricsPage): + def test_backend_source( + self, monkeypatch, lyrics_plugin, lyrics_page: LyricsPage + ): """Test parsed lyrics from each of the configured lyrics pages.""" - lyrics_info = lyrics_plugin.find_lyrics( + monkeypatch.setattr( + "beetsplug.lyrics.LyricsRequestHandler.create_session", + lambda _: requests.Session(), + ) + + assert lyrics_plugin.find_lyrics( Item( artist=lyrics_page.artist, title=lyrics_page.track_title, album="", length=186.0, ) + ) == Lyrics( + lyrics_page.lyrics, + lyrics_page.backend, + url=lyrics_page.url, + language=lyrics_page.language, ) - assert lyrics_info - lyrics, _ = lyrics_info.split("\n\nSource: ") - assert lyrics == lyrics_page.lyrics - class TestGoogleLyrics(LyricsBackendTest): """Test scraping heuristics on a fake html page.""" @@ -499,14 +573,19 @@ class TestLRCLibLyrics(LyricsBackendTest): @pytest.mark.parametrize("response_data", [[lyrics_match()]]) @pytest.mark.parametrize( "plugin_config, expected_lyrics", - [({"synced": True}, SYNCED), ({"synced": False}, "plain")], + [ + pytest.param({"synced": True}, SYNCED, id="pick-synced"), + pytest.param({"synced": False}, "plain", id="pick-plain"), + ], ) - def test_synced_config_option(self, fetch_lyrics, expected_lyrics): - lyrics_info = fetch_lyrics() - assert lyrics_info + def test_synced_config_option( + self, backend_name, fetch_lyrics, expected_lyrics + ): + lyrics = fetch_lyrics() - lyrics, _ = lyrics_info - assert lyrics == expected_lyrics + assert lyrics + assert lyrics.text == expected_lyrics + assert lyrics.backend == backend_name @pytest.mark.parametrize( "response_data, expected_lyrics", @@ -577,14 +656,12 @@ class TestLRCLibLyrics(LyricsBackendTest): ) @pytest.mark.parametrize("plugin_config", [{"synced": True}]) def test_fetch_lyrics(self, fetch_lyrics, expected_lyrics): - lyrics_info = fetch_lyrics() + lyrics = fetch_lyrics() if expected_lyrics is None: - assert not lyrics_info + assert not lyrics else: - assert lyrics_info - lyrics, _ = fetch_lyrics() - - assert lyrics == expected_lyrics + assert lyrics + assert lyrics.text == expected_lyrics class TestTranslation: @@ -634,7 +711,7 @@ class TestTranslation: [Chorus] No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) Ridin' through the thunder, lightnin'""", - "", + Lyrics("", ""), """ [Refrain: Doja Cat] / [Refrain : Doja Cat] Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir) @@ -649,26 +726,28 @@ class TestTranslation: [00:00.00] Some synced lyrics [00:00:50] [00:01.00] Some more synced lyrics - - Source: https://lrclib.net/api/123""", - "", + """, + Lyrics("", ""), """ [00:00.00] Some synced lyrics / Quelques paroles synchronisées [00:00:50] - [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées - - Source: https://lrclib.net/api/123""", + [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées""", # noqa: E501 id="synced", ), pytest.param( "Quelques paroles", - "", + Lyrics("", ""), "Quelques paroles", id="already in the target language", ), pytest.param( "Some lyrics", - "Some lyrics / Some translation", + Lyrics( + "Some lyrics / Some translation", + "", + language="EN", + translation_language="FR", + ), "Some lyrics / Some translation", id="already translated", ), @@ -679,8 +758,8 @@ class TestTranslation: bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"]) assert bing.translate( - textwrap.dedent(new_lyrics), old_lyrics - ) == textwrap.dedent(expected) + Lyrics(textwrap.dedent(new_lyrics), ""), old_lyrics + ).full_text == textwrap.dedent(expected) class TestRestFiles: From 7df14e18779f5b910af7a895862807668dd155c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Tue, 3 Mar 2026 18:19:17 +0000 Subject: [PATCH 07/12] Fix timestamp format in translation/synced lyrics test --- beetsplug/lyrics.py | 2 +- test/plugins/test_lyrics.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index e1e6e4669..a4baf25e7 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -234,7 +234,7 @@ class LyricsRequestHandler(RequestHandler): @dataclass class Lyrics: TRANSLATION_PAT = re.compile(r" / [^\n]+") - LINE_PARTS_PAT = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + LINE_PARTS_PAT = re.compile(r"^(\[\d\d:\d\d\.\d\d\]|) *(.*)$") text: str backend: str diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index f2321391d..560f9101f 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -724,13 +724,13 @@ class TestTranslation: pytest.param( """ [00:00.00] Some synced lyrics - [00:00:50] + [00:00.50] [00:01.00] Some more synced lyrics """, Lyrics("", ""), """ [00:00.00] Some synced lyrics / Quelques paroles synchronisées - [00:00:50] + [00:00.50] [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées""", # noqa: E501 id="synced", ), From 7d30efa82c2197c44ad2c40016241dabeaee9f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Thu, 5 Mar 2026 15:49:15 +0000 Subject: [PATCH 08/12] Migrate lyrics metadata to flex fields on library open - Add `LyricsMetadataInFlexFieldsMigration` to extract legacy source URLs and language metadata from lyrics text into flex attributes - Add `Lyrics.from_legacy_text` to parse legacy lyrics format - Move `with_row_factory` context manager up to base `Migration` class - Rename `migrate_table` to `migrate_model` and pass model class instead of table name string. This is so that the migration can access both `_table` and `_flex_table` attributes. - Make `langdetect` import optional in `Lyrics.__post_init__`: users may not have have the dependency installed, and we do not want the migration to fail because of that. - Move `BACKEND_BY_NAME` to module level for use outside plugin class --- beets/dbcore/db.py | 26 ++++++-- beets/library/library.py | 7 ++- beets/library/migrations.py | 107 ++++++++++++++++++++++++++++---- beetsplug/lyrics.py | 56 ++++++++++++++--- docs/changelog.rst | 5 +- test/library/test_migrations.py | 68 +++++++++++++++++++- test/plugins/test_lyrics.py | 45 ++++++++++++-- 7 files changed, 272 insertions(+), 42 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index 30f7ef7cf..13c995829 100755 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -1064,15 +1064,28 @@ class Migration(ABC): name = cls.__name__.removesuffix("Migration") # type: ignore[attr-defined] return re.sub(r"(?<=[a-z])(?=[A-Z])", "_", name).lower() - def migrate_table(self, table: str, *args, **kwargs) -> None: + @contextmanager + def with_row_factory(self, factory: type[NamedTuple]) -> Iterator[None]: + """Temporarily set the row factory to a specific type.""" + original_factory = self.db._connection().row_factory + self.db._connection().row_factory = lambda _, row: factory(*row) + try: + yield + finally: + self.db._connection().row_factory = original_factory + + def migrate_model(self, model_cls: type[Model], *args, **kwargs) -> None: """Migrate a specific table.""" + table = model_cls._table if not self.db.migration_exists(self.name, table): - self._migrate_data(table, *args, **kwargs) + self._migrate_data(model_cls, *args, **kwargs) self.db.record_migration(self.name, table) @abstractmethod - def _migrate_data(self, table: str, current_fields: set[str]) -> None: - """Migrate data for a specific table.""" + def _migrate_data( + self, model_cls: type[Model], current_fields: set[str] + ) -> None: + """Migrate data for a specific model.""" class TableInfo(TypedDict): @@ -1375,8 +1388,9 @@ class Database: for migration_cls, model_classes in self._migrations: migration = migration_cls(self) for model_cls in model_classes: - table = model_cls._table - migration.migrate_table(table, self.db_tables[table]["columns"]) + migration.migrate_model( + model_cls, self.db_tables[model_cls._table]["columns"] + ) def migration_exists(self, name: str, table: str) -> bool: """Return whether a named migration has been marked complete.""" diff --git a/beets/library/library.py b/beets/library/library.py index b161b7399..823c62a6b 100644 --- a/beets/library/library.py +++ b/beets/library/library.py @@ -8,7 +8,7 @@ import beets from beets import dbcore from beets.util import normpath -from .migrations import MultiGenreFieldMigration +from . import migrations from .models import Album, Item from .queries import PF_KEY_DEFAULT, parse_query_parts, parse_query_string @@ -20,7 +20,10 @@ class Library(dbcore.Database): """A database of music containing songs and albums.""" _models = (Item, Album) - _migrations = ((MultiGenreFieldMigration, (Item, Album)),) + _migrations = ( + (migrations.MultiGenreFieldMigration, (Item, Album)), + (migrations.LyricsMetadataInFlexFieldsMigration, (Item,)), + ) def __init__( self, diff --git a/beets/library/migrations.py b/beets/library/migrations.py index c061ddfc5..0dd72abfe 100644 --- a/beets/library/migrations.py +++ b/beets/library/migrations.py @@ -1,6 +1,6 @@ from __future__ import annotations -from contextlib import contextmanager, suppress +from contextlib import suppress from functools import cached_property from typing import TYPE_CHECKING, NamedTuple, TypeVar @@ -15,6 +15,8 @@ from beets.util import unique_list if TYPE_CHECKING: from collections.abc import Iterator + from beets.dbcore.db import Model + T = TypeVar("T") @@ -40,16 +42,6 @@ class MultiGenreFieldMigration(Migration): separators.extend(["; ", ", ", " / "]) return unique_list(filter(None, separators)) - @contextmanager - def with_factory(self, factory: type[NamedTuple]) -> Iterator[None]: - """Temporarily set the row factory to a specific type.""" - original_factory = self.db._connection().row_factory - self.db._connection().row_factory = lambda _, row: factory(*row) - try: - yield - finally: - self.db._connection().row_factory = original_factory - def get_genres(self, genre: str) -> str: for separator in self.separators: if separator in genre: @@ -57,13 +49,17 @@ class MultiGenreFieldMigration(Migration): return genre - def _migrate_data(self, table: str, current_fields: set[str]) -> None: + def _migrate_data( + self, model_cls: type[Model], current_fields: set[str] + ) -> None: """Migrate legacy genre values to the multi-value genres field.""" if "genre" not in current_fields: # No legacy genre field, so nothing to migrate. return - with self.db.transaction() as tx, self.with_factory(GenreRow): + table = model_cls._table + + with self.db.transaction() as tx, self.with_row_factory(GenreRow): rows: list[GenreRow] = tx.query( # type: ignore[assignment] f""" SELECT id, genre, genres @@ -95,3 +91,88 @@ class MultiGenreFieldMigration(Migration): ) ui.print_(f"Migration complete: {migrated} of {total} {table} updated") + + +class LyricsRow(NamedTuple): + id: int + lyrics: str + + +class LyricsMetadataInFlexFieldsMigration(Migration): + def _migrate_data(self, model_cls: type[Model], _: set[str]) -> None: + """Migrate legacy lyrics to move metadata to flex attributes.""" + table = model_cls._table + flex_table = model_cls._flex_table + + with self.db.transaction() as tx: + migrated_ids = { + r[0] + for r in tx.query( + f""" + SELECT entity_id + FROM {flex_table} + WHERE key == 'lyrics_backend' + """ + ) + } + with self.db.transaction() as tx, self.with_row_factory(LyricsRow): + rows: list[LyricsRow] = tx.query( # type: ignore[assignment] + f""" + SELECT id, lyrics + FROM {table} + WHERE lyrics IS NOT NULL AND lyrics != '' + """ + ) + + total = len(rows) + to_migrate = [r for r in rows if r.id not in migrated_ids] + if not to_migrate: + return + + from beetsplug.lyrics import Lyrics + + migrated = total - len(to_migrate) + + ui.print_(f"Migrating lyrics for {total} {table}...") + lyr_fields = ["backend", "url", "language", "translation_language"] + for batch in chunks(to_migrate, 100): + lyrics_batch = [Lyrics.from_legacy_text(r.lyrics) for r in batch] + ids_with_lyrics = [ + (lyr, r.id) for lyr, r in zip(lyrics_batch, batch) + ] + with self.db.transaction() as tx: + update_rows = [ + (lyr.full_text, r.id) + for lyr, r in zip(lyrics_batch, batch) + if lyr.full_text != r.lyrics + ] + if update_rows: + tx.mutate_many( + f"UPDATE {table} SET lyrics = ? WHERE id = ?", + update_rows, + ) + + # Only insert flex rows for non-null metadata values + flex_rows = [ + (_id, f"lyrics_{field}", val) + for lyr, _id in ids_with_lyrics + for field in lyr_fields + if (val := getattr(lyr, field)) is not None + ] + if flex_rows: + tx.mutate_many( + f""" + INSERT INTO {flex_table} (entity_id, key, value) + VALUES (?, ?, ?) + """, + flex_rows, + ) + + migrated += len(batch) + + ui.print_( + f" Migrated {migrated} {table} " + f"({migrated}/{total} processed)..." + ) + + ui.print_(f"Migration complete: {migrated} of {total} {table} updated") diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index a4baf25e7..e89f99d0e 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -26,10 +26,9 @@ from functools import cached_property, partial, total_ordering from html import unescape from itertools import groupby from pathlib import Path -from typing import TYPE_CHECKING, ClassVar, NamedTuple +from typing import TYPE_CHECKING, Any, ClassVar, NamedTuple from urllib.parse import quote, quote_plus, urlencode, urlparse -import langdetect import requests from bs4 import BeautifulSoup from unidecode import unidecode @@ -233,21 +232,56 @@ class LyricsRequestHandler(RequestHandler): @dataclass class Lyrics: + ORIGINAL_PAT = re.compile(r"[^\n]+ / ") TRANSLATION_PAT = re.compile(r" / [^\n]+") LINE_PARTS_PAT = re.compile(r"^(\[\d\d:\d\d\.\d\d\]|) *(.*)$") text: str - backend: str + backend: str | None = None url: str | None = None language: str | None = None translation_language: str | None = None translations: list[str] = field(default_factory=list) def __post_init__(self) -> None: - if not self.language and self.text: + try: + import langdetect + except ImportError: + return + + if not self.text or self.text == INSTRUMENTAL_LYRICS: + return + + if not self.language: with suppress(langdetect.LangDetectException): self.language = langdetect.detect(self.original_text).upper() + if not self.translation_language: + all_lines = self.text.splitlines() + lines_with_delimiter_count = sum( + 1 for ln in all_lines if " / " in ln + ) + if lines_with_delimiter_count >= len(all_lines) / 2: + # we are confident we are dealing with translations + with suppress(langdetect.LangDetectException): + self.translation_language = langdetect.detect( + self.ORIGINAL_PAT.sub("", self.text) + ).upper() + + @classmethod + def from_legacy_text(cls, text: str) -> Lyrics: + data: dict[str, Any] = {} + data["text"], *suffix = text.split("\n\nSource: ") + if suffix: + url = suffix[0].strip() + url_root = urlparse(url).netloc.removeprefix("www.").split(".")[0] + data.update( + url=url, + backend=url_root if url_root in BACKEND_BY_NAME else "google", + ) + + return cls(**data) + @classmethod def from_item(cls, item: Item) -> Lyrics: data = {"text": item.lyrics} @@ -1014,10 +1048,12 @@ class RestFiles: ui.print_(textwrap.dedent(text)) +BACKEND_BY_NAME = { + b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch] +} + + class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin): - BACKEND_BY_NAME: ClassVar[dict[str, type[Backend]]] = { - b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch] - } item_types: ClassVar[dict[str, types.Type]] = { "lyrics_url": types.STRING, "lyrics_backend": types.STRING, @@ -1029,12 +1065,12 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin): def backends(self) -> list[Backend]: user_sources = self.config["sources"].as_str_seq() - chosen = sanitize_choices(user_sources, self.BACKEND_BY_NAME) + chosen = sanitize_choices(user_sources, BACKEND_BY_NAME) if "google" in chosen and not self.config["google_API_key"].get(): self.warn("Disabling Google source: no API key configured.") chosen.remove("google") - return [self.BACKEND_BY_NAME[c](self.config, self._log) for c in chosen] + return [BACKEND_BY_NAME[c](self.config, self._log) for c in chosen] @cached_property def translator(self) -> Translator | None: @@ -1069,7 +1105,7 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin): # currently block requests with the beets user agent. "sources": [ n - for n in self.BACKEND_BY_NAME + for n in BACKEND_BY_NAME if n not in {"musixmatch", "tekstowo"} ], } diff --git a/docs/changelog.rst b/docs/changelog.rst index de5683952..35bc1ab0c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -42,8 +42,9 @@ New features longer replaced by newly fetched plain lyrics, even when ``force`` is enabled. - :doc:`plugins/lyrics`: Remove ``Source: `` suffix from lyrics. Store the backend name in ``lyrics_backend``, URL in ``lyrics_url``, language - in ``lyrics_language`` and translation language (if enabled) in - ``lyrics_translation_language`` flexible attributes. :bug:`6370` + in ``lyrics_language`` and translation language (if translations present) in + ``lyrics_translation_language`` flexible attributes. Lyrics are automatically + migrated on the first beets run. :bug:`6370` Bug fixes ~~~~~~~~~ diff --git a/test/library/test_migrations.py b/test/library/test_migrations.py index 2c0dece8b..4d72c3e9d 100644 --- a/test/library/test_migrations.py +++ b/test/library/test_migrations.py @@ -1,7 +1,9 @@ +import textwrap + import pytest from beets.dbcore import types -from beets.library.migrations import MultiGenreFieldMigration +from beets.library import migrations from beets.library.models import Album, Item from beets.test.helper import TestHelper @@ -30,13 +32,13 @@ class TestMultiGenreFieldMigration: # and now configure the migrations to be tested monkeypatch.setattr( "beets.library.library.Library._migrations", - ((MultiGenreFieldMigration, (Item, Album)),), + ((migrations.MultiGenreFieldMigration, (Item, Album)),), ) yield helper helper.teardown_beets() - def test_migrates_only_rows_with_missing_genres(self, helper: TestHelper): + def test_migrate(self, helper: TestHelper): helper.config["lastgenre"]["separator"] = " - " expected_item_genres = [] @@ -70,3 +72,63 @@ class TestMultiGenreFieldMigration: del helper.lib.db_tables assert helper.lib.migration_exists("multi_genre_field", "items") assert helper.lib.migration_exists("multi_genre_field", "albums") + + +class TestLyricsMetadataInFlexFieldsMigration: + @pytest.fixture + def helper(self, monkeypatch): + # do not apply migrations upon library initialization + monkeypatch.setattr("beets.library.library.Library._migrations", ()) + + helper = TestHelper() + helper.setup_beets() + + # and now configure the migrations to be tested + monkeypatch.setattr( + "beets.library.library.Library._migrations", + ((migrations.LyricsMetadataInFlexFieldsMigration, (Item,)),), + ) + yield helper + + helper.teardown_beets() + + def test_migrate(self, helper: TestHelper): + lyrics_item = helper.add_item( + lyrics=textwrap.dedent(""" + [00:00.00] Some synced lyrics / Quelques paroles synchronisées + [00:00.50] + [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées + + Source: https://lrclib.net/api/1/""") + ) + instrumental_lyrics_item = helper.add_item(lyrics="[Instrumental]") + + helper.lib._migrate() + + lyrics_item.load() + + assert lyrics_item.lyrics == textwrap.dedent( + """ + [00:00.00] Some synced lyrics / Quelques paroles synchronisées + [00:00.50] + [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées""" + ) + assert lyrics_item.lyrics_backend == "lrclib" + assert lyrics_item.lyrics_url == "https://lrclib.net/api/1/" + assert lyrics_item.lyrics_language == "EN" + assert lyrics_item.lyrics_translation_language == "FR" + + with pytest.raises(AttributeError): + instrumental_lyrics_item.lyrics_backend + with pytest.raises(AttributeError): + instrumental_lyrics_item.lyrics_url + with pytest.raises(AttributeError): + instrumental_lyrics_item.lyrics_language + with pytest.raises(AttributeError): + instrumental_lyrics_item.lyrics_translation_language + + # remove cached initial db tables data + del helper.lib.db_tables + assert helper.lib.migration_exists( + "lyrics_metadata_in_flex_fields", "items" + ) diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 560f9101f..586720afd 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -330,7 +330,7 @@ class TestLyricsPlugin(LyricsPluginMixin): monkeypatch.setattr( lyrics_plugin, "find_lyrics", - lambda _: Lyrics(found, "") if found is not None else None, + lambda _: Lyrics(found) if found is not None else None, ) item = helper.create_item(id=1, lyrics=old_lyrics) @@ -711,7 +711,7 @@ class TestTranslation: [Chorus] No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) Ridin' through the thunder, lightnin'""", - Lyrics("", ""), + Lyrics(""), """ [Refrain: Doja Cat] / [Refrain : Doja Cat] Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir) @@ -727,7 +727,7 @@ class TestTranslation: [00:00.50] [00:01.00] Some more synced lyrics """, - Lyrics("", ""), + Lyrics(""), """ [00:00.00] Some synced lyrics / Quelques paroles synchronisées [00:00.50] @@ -736,7 +736,7 @@ class TestTranslation: ), pytest.param( "Quelques paroles", - Lyrics("", ""), + Lyrics(""), "Quelques paroles", id="already in the target language", ), @@ -744,7 +744,6 @@ class TestTranslation: "Some lyrics", Lyrics( "Some lyrics / Some translation", - "", language="EN", translation_language="FR", ), @@ -758,10 +757,44 @@ class TestTranslation: bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"]) assert bing.translate( - Lyrics(textwrap.dedent(new_lyrics), ""), old_lyrics + Lyrics(textwrap.dedent(new_lyrics)), old_lyrics ).full_text == textwrap.dedent(expected) +class TestLegacyLyrics: + def test_instrumental_lyrics(self): + lyrics = Lyrics( + "[Instrumental]", "lrclib", url="https://lrclib.net/api/1" + ) + + assert lyrics.full_text == "[Instrumental]" + assert lyrics.backend == "lrclib" + assert lyrics.url == "https://lrclib.net/api/1" + assert lyrics.language is None + assert lyrics.translation_language is None + + def test_from_legacy_text(self): + text = textwrap.dedent(""" + [00:00.00] Some synced lyrics / Quelques paroles synchronisées + [00:00.50] + [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées + + Source: https://lrclib.net/api/1/""") + + lyrics = Lyrics.from_legacy_text(text) + + assert lyrics.full_text == textwrap.dedent( + """ + [00:00.00] Some synced lyrics / Quelques paroles synchronisées + [00:00.50] + [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées""" + ) + assert lyrics.backend == "lrclib" + assert lyrics.url == "https://lrclib.net/api/1/" + assert lyrics.language == "EN" + assert lyrics.translation_language == "FR" + + class TestRestFiles: @pytest.fixture def rest_dir(self, tmp_path): From 50a564e66d09e1723f9252a0a33be11ebf5ba585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Thu, 5 Mar 2026 15:49:38 +0000 Subject: [PATCH 09/12] Set constant langdetection seed for stable results --- beetsplug/lyrics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index e89f99d0e..dac7c0c7d 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -249,6 +249,9 @@ class Lyrics: except ImportError: return + # Set seed to 0 for deterministic results + langdetect.DetectorFactory.seed = 0 + if not self.text or self.text == INSTRUMENTAL_LYRICS: return From 05a822dd5c0494d97bc4a89f5e06f3c3013b74f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Thu, 5 Mar 2026 17:46:32 +0000 Subject: [PATCH 10/12] Move Lyrics class under beets.util as it is used by migrations --- beets/library/migrations.py | 3 +- beets/util/lyrics.py | 132 ++++++++++++++++++++++++++++++++++++ beetsplug/lyrics.py | 124 +-------------------------------- test/plugins/test_lyrics.py | 36 +--------- test/util/test_lyrics.py | 37 ++++++++++ 5 files changed, 174 insertions(+), 158 deletions(-) create mode 100644 beets/util/lyrics.py create mode 100644 test/util/test_lyrics.py diff --git a/beets/library/migrations.py b/beets/library/migrations.py index 0dd72abfe..f6b8ef52d 100644 --- a/beets/library/migrations.py +++ b/beets/library/migrations.py @@ -11,6 +11,7 @@ from beets import ui from beets.dbcore.db import Migration from beets.dbcore.types import MULTI_VALUE_DELIMITER from beets.util import unique_list +from beets.util.lyrics import Lyrics if TYPE_CHECKING: from collections.abc import Iterator @@ -129,8 +130,6 @@ class LyricsMetadataInFlexFieldsMigration(Migration): if not to_migrate: return - from beetsplug.lyrics import Lyrics - migrated = total - len(to_migrate) ui.print_(f"Migrating lyrics for {total} {table}...") diff --git a/beets/util/lyrics.py b/beets/util/lyrics.py new file mode 100644 index 000000000..a4cf2fe0e --- /dev/null +++ b/beets/util/lyrics.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import re +from contextlib import suppress +from dataclasses import dataclass, field +from functools import cached_property +from typing import TYPE_CHECKING, Any +from urllib.parse import urlparse + +from beets.util import unique_list + +if TYPE_CHECKING: + from beets.library import Item + +INSTRUMENTAL_LYRICS = "[Instrumental]" +BACKEND_NAMES = {"genius", "musixmatch", "lrclib", "tekstowo"} + + +@dataclass +class Lyrics: + ORIGINAL_PAT = re.compile(r"[^\n]+ / ") + TRANSLATION_PAT = re.compile(r" / [^\n]+") + LINE_PARTS_PAT = re.compile(r"^(\[\d\d:\d\d\.\d\d\]|) *(.*)$") + + text: str + backend: str | None = None + url: str | None = None + language: str | None = None + translation_language: str | None = None + translations: list[str] = field(default_factory=list) + + def __post_init__(self) -> None: + try: + import langdetect + except ImportError: + return + + # Set seed to 0 for deterministic results + langdetect.DetectorFactory.seed = 0 + + if not self.text or self.text == INSTRUMENTAL_LYRICS: + return + + if not self.language: + with suppress(langdetect.LangDetectException): + self.language = langdetect.detect(self.original_text).upper() + + if not self.translation_language: + all_lines = self.text.splitlines() + lines_with_delimiter_count = sum( + 1 for ln in all_lines if " / " in ln + ) + if lines_with_delimiter_count >= len(all_lines) / 2: + # we are confident we are dealing with translations + with suppress(langdetect.LangDetectException): + self.translation_language = langdetect.detect( + self.ORIGINAL_PAT.sub("", self.text) + ).upper() + + @classmethod + def from_legacy_text(cls, text: str) -> Lyrics: + data: dict[str, Any] = {} + data["text"], *suffix = text.split("\n\nSource: ") + if suffix: + url = suffix[0].strip() + url_root = urlparse(url).netloc.removeprefix("www.").split(".")[0] + data.update( + url=url, + backend=url_root if url_root in BACKEND_NAMES else "google", + ) + + return cls(**data) + + @classmethod + def from_item(cls, item: Item) -> Lyrics: + data = {"text": item.lyrics} + for key in ("backend", "url", "language", "translation_language"): + data[key] = item.get(f"lyrics_{key}", with_album=False) + + return cls(**data) + + @cached_property + def original_text(self) -> str: + """Return the original text without translations.""" + # Remove translations from the lyrics text. + return self.TRANSLATION_PAT.sub("", self.text).strip() + + @cached_property + def _split_lines(self) -> list[tuple[str, str]]: + """Append translations to the given lyrics texts. + + Lines may contain timestamps from LRCLib which need to be temporarily + removed for the translation. They can take any of these forms: + - empty + Text - text only + [00:00.00] - timestamp only + [00:00.00] Text - timestamp with text + """ + return [ + (m[1], m[2]) if (m := self.LINE_PARTS_PAT.match(line)) else ("", "") + for line in self.text.splitlines() + ] + + @cached_property + def timestamps(self) -> list[str]: + return [ts for ts, _ in self._split_lines] + + @cached_property + def text_lines(self) -> list[str]: + return [ln for _, ln in self._split_lines] + + @property + def synced(self) -> bool: + return any(self.timestamps) + + @property + def translated(self) -> bool: + return bool(self.translation_language) + + @property + def full_text(self) -> str: + if not self.translations: + return self.text + + text_pairs = list(zip(self.text_lines, self.translations)) + + # only add the separator for non-empty and differing translations + texts = [" / ".join(unique_list(filter(None, p))) for p in text_pairs] + # only add the space between non-empty timestamps and texts + return "\n".join( + " ".join(filter(None, p)) for p in zip(self.timestamps, texts) + ) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index dac7c0c7d..5f8caa036 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -21,12 +21,12 @@ import math import re import textwrap from contextlib import contextmanager, suppress -from dataclasses import dataclass, field +from dataclasses import dataclass from functools import cached_property, partial, total_ordering from html import unescape from itertools import groupby from pathlib import Path -from typing import TYPE_CHECKING, Any, ClassVar, NamedTuple +from typing import TYPE_CHECKING, ClassVar, NamedTuple from urllib.parse import quote, quote_plus, urlencode, urlparse import requests @@ -36,8 +36,8 @@ from unidecode import unidecode from beets import plugins, ui from beets.autotag.distance import string_dist from beets.dbcore import types -from beets.util import unique_list from beets.util.config import sanitize_choices +from beets.util.lyrics import INSTRUMENTAL_LYRICS, Lyrics from ._utils.requests import HTTPNotFoundError, RequestHandler @@ -58,8 +58,6 @@ if TYPE_CHECKING: TranslatorAPI, ) -INSTRUMENTAL_LYRICS = "[Instrumental]" - class CaptchaError(requests.exceptions.HTTPError): def __init__(self, *args, **kwargs) -> None: @@ -230,122 +228,6 @@ class LyricsRequestHandler(RequestHandler): self.warn("Request error: {}", exc) -@dataclass -class Lyrics: - ORIGINAL_PAT = re.compile(r"[^\n]+ / ") - TRANSLATION_PAT = re.compile(r" / [^\n]+") - LINE_PARTS_PAT = re.compile(r"^(\[\d\d:\d\d\.\d\d\]|) *(.*)$") - - text: str - backend: str | None = None - url: str | None = None - language: str | None = None - translation_language: str | None = None - translations: list[str] = field(default_factory=list) - - def __post_init__(self) -> None: - try: - import langdetect - except ImportError: - return - - # Set seed to 0 for deterministic results - langdetect.DetectorFactory.seed = 0 - - if not self.text or self.text == INSTRUMENTAL_LYRICS: - return - - if not self.language: - with suppress(langdetect.LangDetectException): - self.language = langdetect.detect(self.original_text).upper() - - if not self.translation_language: - all_lines = self.text.splitlines() - lines_with_delimiter_count = sum( - 1 for ln in all_lines if " / " in ln - ) - if lines_with_delimiter_count >= len(all_lines) / 2: - # we are confident we are dealing with translations - with suppress(langdetect.LangDetectException): - self.translation_language = langdetect.detect( - self.ORIGINAL_PAT.sub("", self.text) - ).upper() - - @classmethod - def from_legacy_text(cls, text: str) -> Lyrics: - data: dict[str, Any] = {} - data["text"], *suffix = text.split("\n\nSource: ") - if suffix: - url = suffix[0].strip() - url_root = urlparse(url).netloc.removeprefix("www.").split(".")[0] - data.update( - url=url, - backend=url_root if url_root in BACKEND_BY_NAME else "google", - ) - - return cls(**data) - - @classmethod - def from_item(cls, item: Item) -> Lyrics: - data = {"text": item.lyrics} - for key in ("backend", "url", "language", "translation_language"): - data[key] = item.get(f"lyrics_{key}", with_album=False) - - return cls(**data) - - @cached_property - def original_text(self) -> str: - """Return the original text without translations.""" - # Remove translations from the lyrics text. - return self.TRANSLATION_PAT.sub("", self.text).strip() - - @cached_property - def _split_lines(self) -> list[tuple[str, str]]: - """Append translations to the given lyrics texts. - - Lines may contain timestamps from LRCLib which need to be temporarily - removed for the translation. They can take any of these forms: - - empty - Text - text only - [00:00.00] - timestamp only - [00:00.00] Text - timestamp with text - """ - return [ - (m[1], m[2]) if (m := self.LINE_PARTS_PAT.match(line)) else ("", "") - for line in self.text.splitlines() - ] - - @cached_property - def timestamps(self) -> list[str]: - return [ts for ts, _ in self._split_lines] - - @cached_property - def text_lines(self) -> list[str]: - return [ln for _, ln in self._split_lines] - - @property - def synced(self) -> bool: - return any(self.timestamps) - - @property - def translated(self) -> bool: - return bool(self.translation_language) - - @property - def full_text(self) -> str: - if not self.translations: - return self.text - - text_pairs = list(zip(self.text_lines, self.translations)) - - # only add the separator for non-empty and differing translations - texts = [" / ".join(unique_list(filter(None, p))) for p in text_pairs] - # only add the space between non-empty timestamps and texts - return "\n".join( - " ".join(filter(None, p)) for p in zip(self.timestamps, texts) - ) - - class BackendClass(type): @property def name(cls) -> str: diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 586720afd..5af864f92 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -27,8 +27,8 @@ import requests from beets.library import Item from beets.test.helper import PluginMixin, TestHelper +from beets.util.lyrics import Lyrics from beetsplug import lyrics -from beetsplug.lyrics import Lyrics from .lyrics_pages import lyrics_pages @@ -761,40 +761,6 @@ class TestTranslation: ).full_text == textwrap.dedent(expected) -class TestLegacyLyrics: - def test_instrumental_lyrics(self): - lyrics = Lyrics( - "[Instrumental]", "lrclib", url="https://lrclib.net/api/1" - ) - - assert lyrics.full_text == "[Instrumental]" - assert lyrics.backend == "lrclib" - assert lyrics.url == "https://lrclib.net/api/1" - assert lyrics.language is None - assert lyrics.translation_language is None - - def test_from_legacy_text(self): - text = textwrap.dedent(""" - [00:00.00] Some synced lyrics / Quelques paroles synchronisées - [00:00.50] - [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées - - Source: https://lrclib.net/api/1/""") - - lyrics = Lyrics.from_legacy_text(text) - - assert lyrics.full_text == textwrap.dedent( - """ - [00:00.00] Some synced lyrics / Quelques paroles synchronisées - [00:00.50] - [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées""" - ) - assert lyrics.backend == "lrclib" - assert lyrics.url == "https://lrclib.net/api/1/" - assert lyrics.language == "EN" - assert lyrics.translation_language == "FR" - - class TestRestFiles: @pytest.fixture def rest_dir(self, tmp_path): diff --git a/test/util/test_lyrics.py b/test/util/test_lyrics.py new file mode 100644 index 000000000..4899f7133 --- /dev/null +++ b/test/util/test_lyrics.py @@ -0,0 +1,37 @@ +import textwrap + +from beets.util.lyrics import Lyrics + + +class TestLyrics: + def test_instrumental_lyrics(self): + lyrics = Lyrics( + "[Instrumental]", "lrclib", url="https://lrclib.net/api/1" + ) + + assert lyrics.full_text == "[Instrumental]" + assert lyrics.backend == "lrclib" + assert lyrics.url == "https://lrclib.net/api/1" + assert lyrics.language is None + assert lyrics.translation_language is None + + def test_from_legacy_text(self): + text = textwrap.dedent(""" + [00:00.00] Some synced lyrics / Quelques paroles synchronisées + [00:00.50] + [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées + + Source: https://lrclib.net/api/1/""") + + lyrics = Lyrics.from_legacy_text(text) + + assert lyrics.full_text == textwrap.dedent( + """ + [00:00.00] Some synced lyrics / Quelques paroles synchronisées + [00:00.50] + [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées""" + ) + assert lyrics.backend == "lrclib" + assert lyrics.url == "https://lrclib.net/api/1/" + assert lyrics.language == "EN" + assert lyrics.translation_language == "FR" From e8605747d691242f9056cac7f6c5cff80565e721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Thu, 5 Mar 2026 17:49:54 +0000 Subject: [PATCH 11/12] Document migrations --- docs/api/database.rst | 1 + docs/dev/library.rst | 47 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/docs/api/database.rst b/docs/api/database.rst index b8c2235a2..4a845d559 100644 --- a/docs/api/database.rst +++ b/docs/api/database.rst @@ -29,6 +29,7 @@ Transactions .. autosummary:: :toctree: generated/ + Migration Transaction Queries diff --git a/docs/dev/library.rst b/docs/dev/library.rst index 8b854937d..1ce22df08 100644 --- a/docs/dev/library.rst +++ b/docs/dev/library.rst @@ -166,6 +166,53 @@ rolling back the transaction if an error occurs. .. _blog post: https://beets.io/blog/sqlite-nightmare.html +Migrations +~~~~~~~~~~ + +The database layer includes a first-class migration system for data changes that +must happen alongside schema evolution. This keeps compatibility work explicit, +testable, and isolated from normal query and model code. + +Each database subclass declares its migrations in ``_migrations`` as pairs of a +migration class and the model classes it applies to. During startup, the +database creates required tables and columns first, then executes configured +migrations. + +Migration completion is tracked in a dedicated ``migrations`` table keyed by +migration name and table name. This means each migration runs at most once per +table, so large one-time data rewrites can be safely coordinated across +restarts. + +The migration name is derived from the migration class name. Because that name +is the persisted identity in the ``migrations`` table, renaming a released +migration class changes its identity and can cause the migration to run again. +Treat migration class names as stable once shipped. + +For example, ``MultiGenreFieldMigration`` becomes ``multi_genre_field``. After +it runs for the ``items`` table, beets records a row equivalent to: + +.. code-block:: text + + name = "multi_genre_field", table_name = "items" + +Common use cases include: + +1. Backfilling a newly introduced canonical field from older data. +2. Normalizing legacy free-form values into a structured representation. +3. Splitting mixed-content legacy fields into cleaned primary content plus + auxiliary metadata stored as flexible attributes. + +To add a migration: + +1. Create a :class:`beets.dbcore.db.Migration` subclass. +2. Implement the table-specific data rewrite logic in ``_migrate_data``. +3. Register the migration in the database subclass ``_migrations`` list for the + target models. + +In practice, migrations should be idempotent and conservative: gate behavior on +the current schema when needed, keep writes transactional, and batch large +updates so startup remains predictable for real libraries. + Queries ------- From ace6a99d07094394671251372f63068d4c4a3a3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Thu, 5 Mar 2026 18:39:58 +0000 Subject: [PATCH 12/12] Document methods changed/introduced --- beets/dbcore/db.py | 6 ++++-- beets/library/migrations.py | 6 ++++++ beets/util/lyrics.py | 25 ++++++++++++++++++------- beetsplug/lyrics.py | 10 ++++++++++ 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index 13c995829..9d7469689 100755 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -1056,6 +1056,8 @@ class Transaction: @dataclass class Migration(ABC): + """Define a one-time data migration that runs during database startup.""" + db: Database @cached_classproperty @@ -1066,7 +1068,7 @@ class Migration(ABC): @contextmanager def with_row_factory(self, factory: type[NamedTuple]) -> Iterator[None]: - """Temporarily set the row factory to a specific type.""" + """Temporarily decode query rows into a typed tuple shape.""" original_factory = self.db._connection().row_factory self.db._connection().row_factory = lambda _, row: factory(*row) try: @@ -1075,7 +1077,7 @@ class Migration(ABC): self.db._connection().row_factory = original_factory def migrate_model(self, model_cls: type[Model], *args, **kwargs) -> None: - """Migrate a specific table.""" + """Run this migration once for a model's backing table.""" table = model_cls._table if not self.db.migration_exists(self.name, table): self._migrate_data(model_cls, *args, **kwargs) diff --git a/beets/library/migrations.py b/beets/library/migrations.py index f6b8ef52d..30501dab1 100644 --- a/beets/library/migrations.py +++ b/beets/library/migrations.py @@ -34,8 +34,11 @@ def chunks(lst: list[T], n: int) -> Iterator[list[T]]: class MultiGenreFieldMigration(Migration): + """Backfill multi-value genres from legacy single-string genre data.""" + @cached_property def separators(self) -> list[str]: + """Return known separators that indicate multiple legacy genres.""" separators = [] with suppress(ConfigError): separators.append(beets.config["lastgenre"]["separator"].as_str()) @@ -44,6 +47,7 @@ class MultiGenreFieldMigration(Migration): return unique_list(filter(None, separators)) def get_genres(self, genre: str) -> str: + """Normalize legacy genre separators to the canonical delimiter.""" for separator in self.separators: if separator in genre: return genre.replace(separator, MULTI_VALUE_DELIMITER) @@ -100,6 +104,8 @@ class LyricsRow(NamedTuple): class LyricsMetadataInFlexFieldsMigration(Migration): + """Move legacy inline lyrics metadata into dedicated flexible fields.""" + def _migrate_data(self, model_cls: type[Model], _: set[str]) -> None: """Migrate legacy lyrics to move metadata to flex attributes.""" table = model_cls._table diff --git a/beets/util/lyrics.py b/beets/util/lyrics.py index a4cf2fe0e..76607805e 100644 --- a/beets/util/lyrics.py +++ b/beets/util/lyrics.py @@ -18,6 +18,13 @@ BACKEND_NAMES = {"genius", "musixmatch", "lrclib", "tekstowo"} @dataclass class Lyrics: + """Represent lyrics text together with structured source metadata. + + This value object keeps the canonical lyrics body, optional provenance, and + optional translation metadata synchronized across fetching, translation, and + persistence. + """ + ORIGINAL_PAT = re.compile(r"[^\n]+ / ") TRANSLATION_PAT = re.compile(r" / [^\n]+") LINE_PARTS_PAT = re.compile(r"^(\[\d\d:\d\d\.\d\d\]|) *(.*)$") @@ -30,6 +37,7 @@ class Lyrics: translations: list[str] = field(default_factory=list) def __post_init__(self) -> None: + """Populate missing language metadata from the current text.""" try: import langdetect except ImportError: @@ -59,6 +67,7 @@ class Lyrics: @classmethod def from_legacy_text(cls, text: str) -> Lyrics: + """Build lyrics from legacy text that may include an inline source.""" data: dict[str, Any] = {} data["text"], *suffix = text.split("\n\nSource: ") if suffix: @@ -73,6 +82,7 @@ class Lyrics: @classmethod def from_item(cls, item: Item) -> Lyrics: + """Build lyrics from an item's canonical text and flexible metadata.""" data = {"text": item.lyrics} for key in ("backend", "url", "language", "translation_language"): data[key] = item.get(f"lyrics_{key}", with_album=False) @@ -87,14 +97,10 @@ class Lyrics: @cached_property def _split_lines(self) -> list[tuple[str, str]]: - """Append translations to the given lyrics texts. + """Split lyrics into timestamp/text pairs for line-wise processing. - Lines may contain timestamps from LRCLib which need to be temporarily - removed for the translation. They can take any of these forms: - - empty - Text - text only - [00:00.00] - timestamp only - [00:00.00] Text - timestamp with text + Timestamps, when present, are kept separate so callers can translate or + normalize text without losing synced timing information. """ return [ (m[1], m[2]) if (m := self.LINE_PARTS_PAT.match(line)) else ("", "") @@ -103,22 +109,27 @@ class Lyrics: @cached_property def timestamps(self) -> list[str]: + """Return per-line timestamp prefixes from the lyrics text.""" return [ts for ts, _ in self._split_lines] @cached_property def text_lines(self) -> list[str]: + """Return per-line lyric text with timestamps removed.""" return [ln for _, ln in self._split_lines] @property def synced(self) -> bool: + """Return whether the lyrics contain synced timestamp markers.""" return any(self.timestamps) @property def translated(self) -> bool: + """Return whether translation metadata is available.""" return bool(self.translation_language) @property def full_text(self) -> str: + """Return canonical text with translations merged when available.""" if not self.translations: return self.text diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 5f8caa036..f6a873ffe 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -245,12 +245,15 @@ class Backend(LyricsRequestHandler, metaclass=BackendClass): def fetch( self, artist: str, title: str, album: str, length: int ) -> Lyrics | None: + """Return lyrics for a song, or ``None`` when no match is found.""" raise NotImplementedError @dataclass @total_ordering class LRCLyrics: + """Hold LRCLib candidate data and ranking helpers for matching.""" + #: Percentage tolerance for max duration difference between lyrics and item. DURATION_DIFF_TOLERANCE = 0.05 @@ -269,6 +272,7 @@ class LRCLyrics: def verify_synced_lyrics( cls, duration: float, lyrics: str | None ) -> str | None: + """Accept synced lyrics only when the final timestamp fits duration.""" if lyrics and ( m := Lyrics.LINE_PARTS_PAT.match(lyrics.splitlines()[-1]) ): @@ -284,6 +288,7 @@ class LRCLyrics: def make( cls, candidate: LRCLibAPI.Item, target_duration: float ) -> LRCLyrics: + """Build a scored candidate from LRCLib payload data.""" duration = candidate["duration"] or 0.0 return cls( target_duration, @@ -326,6 +331,7 @@ class LRCLyrics: return not self.synced, self.duration_dist def get_text(self, want_synced: bool) -> str: + """Return the preferred text form for this candidate.""" if self.instrumental: return INSTRUMENTAL_LYRICS @@ -761,6 +767,8 @@ class Google(SearchBackend): @dataclass class Translator(LyricsRequestHandler): + """Translate lyrics text while preserving existing structured metadata.""" + TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" SEPARATOR = " | " @@ -777,6 +785,7 @@ class Translator(LyricsRequestHandler): to_language: str, from_languages: list[str] | None = None, ) -> Translator: + """Construct a translator with normalized language configuration.""" return cls( log, api_key, @@ -1060,6 +1069,7 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin): self.add_item_lyrics(item, False) def find_lyrics(self, item: Item) -> Lyrics | None: + """Return the first lyrics match from the configured source search.""" album, length = item.album, round(item.length) matches = ( self.get_lyrics(a, t, album, length)