mirror of
https://github.com/beetbox/beets.git
synced 2026-03-07 05:34:40 +01:00
Add lyrics_url, lyrics_backend flex attrs, improve lrclib reliability (#6393)
Fixes: #6370 This PR completes the lyrics pipeline refactor around a structured `Lyrics` value object and aligns storage, migration, and docs with that model. At a high level, lyrics handling is now end-to-end structured instead of ad-hoc string/tuple flows: fetchers return `Lyrics`, translation operates on `Lyrics`, and persistence writes both canonical text and structured metadata. High-level impact: - Backends now return `Lyrics` instead of `(text, url)` tuples. - Lyrics source metadata is no longer embedded in `item.lyrics` as a `Source: ...` suffix. - Lyrics metadata is stored in flexible fields: `lyrics_backend`, `lyrics_url`, `lyrics_language`, `lyrics_translation_language`. - Existing libraries are automatically migrated on first run by a one-time data migration that: normalizes legacy mixed-content lyrics text and moves auxiliary metadata into flex fields. - Sync safety is improved: with `synced` enabled, existing synced lyrics are not replaced by newly fetched plain lyrics, even with `force`. - LRCLib synced lyrics validation is stricter: synced results are accepted only when the final synced timestamp is consistent with track duration. Docs and tests: - Lyrics plugin docs now describe the new flexible metadata fields and synced replacement behavior. - Developer docs now document migration lifecycle, class-name-based migration identity, and migration use cases. - Changelog updated for all user-visible behavior changes. - Tests were expanded/updated for migration behavior, backend return types, translation behavior, synced-lyrics safety, and LRCLib duration validation.
This commit is contained in:
commit
a6ac5eff5b
14 changed files with 749 additions and 177 deletions
|
|
@ -1056,6 +1056,8 @@ class Transaction:
|
|||
|
||||
@dataclass
|
||||
class Migration(ABC):
|
||||
"""Define a one-time data migration that runs during database startup."""
|
||||
|
||||
db: Database
|
||||
|
||||
@cached_classproperty
|
||||
|
|
@ -1064,15 +1066,28 @@ class Migration(ABC):
|
|||
name = cls.__name__.removesuffix("Migration") # type: ignore[attr-defined]
|
||||
return re.sub(r"(?<=[a-z])(?=[A-Z])", "_", name).lower()
|
||||
|
||||
def migrate_table(self, table: str, *args, **kwargs) -> None:
|
||||
"""Migrate a specific table."""
|
||||
@contextmanager
|
||||
def with_row_factory(self, factory: type[NamedTuple]) -> Iterator[None]:
|
||||
"""Temporarily decode query rows into a typed tuple shape."""
|
||||
original_factory = self.db._connection().row_factory
|
||||
self.db._connection().row_factory = lambda _, row: factory(*row)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.db._connection().row_factory = original_factory
|
||||
|
||||
def migrate_model(self, model_cls: type[Model], *args, **kwargs) -> None:
|
||||
"""Run this migration once for a model's backing table."""
|
||||
table = model_cls._table
|
||||
if not self.db.migration_exists(self.name, table):
|
||||
self._migrate_data(table, *args, **kwargs)
|
||||
self._migrate_data(model_cls, *args, **kwargs)
|
||||
self.db.record_migration(self.name, table)
|
||||
|
||||
@abstractmethod
|
||||
def _migrate_data(self, table: str, current_fields: set[str]) -> None:
|
||||
"""Migrate data for a specific table."""
|
||||
def _migrate_data(
|
||||
self, model_cls: type[Model], current_fields: set[str]
|
||||
) -> None:
|
||||
"""Migrate data for a specific model."""
|
||||
|
||||
|
||||
class TableInfo(TypedDict):
|
||||
|
|
@ -1375,8 +1390,9 @@ class Database:
|
|||
for migration_cls, model_classes in self._migrations:
|
||||
migration = migration_cls(self)
|
||||
for model_cls in model_classes:
|
||||
table = model_cls._table
|
||||
migration.migrate_table(table, self.db_tables[table]["columns"])
|
||||
migration.migrate_model(
|
||||
model_cls, self.db_tables[model_cls._table]["columns"]
|
||||
)
|
||||
|
||||
def migration_exists(self, name: str, table: str) -> bool:
|
||||
"""Return whether a named migration has been marked complete."""
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import beets
|
|||
from beets import dbcore
|
||||
from beets.util import normpath
|
||||
|
||||
from .migrations import MultiGenreFieldMigration
|
||||
from . import migrations
|
||||
from .models import Album, Item
|
||||
from .queries import PF_KEY_DEFAULT, parse_query_parts, parse_query_string
|
||||
|
||||
|
|
@ -20,7 +20,10 @@ class Library(dbcore.Database):
|
|||
"""A database of music containing songs and albums."""
|
||||
|
||||
_models = (Item, Album)
|
||||
_migrations = ((MultiGenreFieldMigration, (Item, Album)),)
|
||||
_migrations = (
|
||||
(migrations.MultiGenreFieldMigration, (Item, Album)),
|
||||
(migrations.LyricsMetadataInFlexFieldsMigration, (Item,)),
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager, suppress
|
||||
from contextlib import suppress
|
||||
from functools import cached_property
|
||||
from typing import TYPE_CHECKING, NamedTuple, TypeVar
|
||||
|
||||
|
|
@ -11,10 +11,13 @@ from beets import ui
|
|||
from beets.dbcore.db import Migration
|
||||
from beets.dbcore.types import MULTI_VALUE_DELIMITER
|
||||
from beets.util import unique_list
|
||||
from beets.util.lyrics import Lyrics
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator
|
||||
|
||||
from beets.dbcore.db import Model
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
|
|
@ -31,8 +34,11 @@ def chunks(lst: list[T], n: int) -> Iterator[list[T]]:
|
|||
|
||||
|
||||
class MultiGenreFieldMigration(Migration):
|
||||
"""Backfill multi-value genres from legacy single-string genre data."""
|
||||
|
||||
@cached_property
|
||||
def separators(self) -> list[str]:
|
||||
"""Return known separators that indicate multiple legacy genres."""
|
||||
separators = []
|
||||
with suppress(ConfigError):
|
||||
separators.append(beets.config["lastgenre"]["separator"].as_str())
|
||||
|
|
@ -40,30 +46,25 @@ class MultiGenreFieldMigration(Migration):
|
|||
separators.extend(["; ", ", ", " / "])
|
||||
return unique_list(filter(None, separators))
|
||||
|
||||
@contextmanager
|
||||
def with_factory(self, factory: type[NamedTuple]) -> Iterator[None]:
|
||||
"""Temporarily set the row factory to a specific type."""
|
||||
original_factory = self.db._connection().row_factory
|
||||
self.db._connection().row_factory = lambda _, row: factory(*row)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.db._connection().row_factory = original_factory
|
||||
|
||||
def get_genres(self, genre: str) -> str:
|
||||
"""Normalize legacy genre separators to the canonical delimiter."""
|
||||
for separator in self.separators:
|
||||
if separator in genre:
|
||||
return genre.replace(separator, MULTI_VALUE_DELIMITER)
|
||||
|
||||
return genre
|
||||
|
||||
def _migrate_data(self, table: str, current_fields: set[str]) -> None:
|
||||
def _migrate_data(
|
||||
self, model_cls: type[Model], current_fields: set[str]
|
||||
) -> None:
|
||||
"""Migrate legacy genre values to the multi-value genres field."""
|
||||
if "genre" not in current_fields:
|
||||
# No legacy genre field, so nothing to migrate.
|
||||
return
|
||||
|
||||
with self.db.transaction() as tx, self.with_factory(GenreRow):
|
||||
table = model_cls._table
|
||||
|
||||
with self.db.transaction() as tx, self.with_row_factory(GenreRow):
|
||||
rows: list[GenreRow] = tx.query( # type: ignore[assignment]
|
||||
f"""
|
||||
SELECT id, genre, genres
|
||||
|
|
@ -95,3 +96,88 @@ class MultiGenreFieldMigration(Migration):
|
|||
)
|
||||
|
||||
ui.print_(f"Migration complete: {migrated} of {total} {table} updated")
|
||||
|
||||
|
||||
class LyricsRow(NamedTuple):
|
||||
id: int
|
||||
lyrics: str
|
||||
|
||||
|
||||
class LyricsMetadataInFlexFieldsMigration(Migration):
|
||||
"""Move legacy inline lyrics metadata into dedicated flexible fields."""
|
||||
|
||||
def _migrate_data(self, model_cls: type[Model], _: set[str]) -> None:
|
||||
"""Migrate legacy lyrics to move metadata to flex attributes."""
|
||||
table = model_cls._table
|
||||
flex_table = model_cls._flex_table
|
||||
|
||||
with self.db.transaction() as tx:
|
||||
migrated_ids = {
|
||||
r[0]
|
||||
for r in tx.query(
|
||||
f"""
|
||||
SELECT entity_id
|
||||
FROM {flex_table}
|
||||
WHERE key == 'lyrics_backend'
|
||||
"""
|
||||
)
|
||||
}
|
||||
with self.db.transaction() as tx, self.with_row_factory(LyricsRow):
|
||||
rows: list[LyricsRow] = tx.query( # type: ignore[assignment]
|
||||
f"""
|
||||
SELECT id, lyrics
|
||||
FROM {table}
|
||||
WHERE lyrics IS NOT NULL AND lyrics != ''
|
||||
"""
|
||||
)
|
||||
|
||||
total = len(rows)
|
||||
to_migrate = [r for r in rows if r.id not in migrated_ids]
|
||||
if not to_migrate:
|
||||
return
|
||||
|
||||
migrated = total - len(to_migrate)
|
||||
|
||||
ui.print_(f"Migrating lyrics for {total} {table}...")
|
||||
lyr_fields = ["backend", "url", "language", "translation_language"]
|
||||
for batch in chunks(to_migrate, 100):
|
||||
lyrics_batch = [Lyrics.from_legacy_text(r.lyrics) for r in batch]
|
||||
ids_with_lyrics = [
|
||||
(lyr, r.id) for lyr, r in zip(lyrics_batch, batch)
|
||||
]
|
||||
with self.db.transaction() as tx:
|
||||
update_rows = [
|
||||
(lyr.full_text, r.id)
|
||||
for lyr, r in zip(lyrics_batch, batch)
|
||||
if lyr.full_text != r.lyrics
|
||||
]
|
||||
if update_rows:
|
||||
tx.mutate_many(
|
||||
f"UPDATE {table} SET lyrics = ? WHERE id = ?",
|
||||
update_rows,
|
||||
)
|
||||
|
||||
# Only insert flex rows for non-null metadata values
|
||||
flex_rows = [
|
||||
(_id, f"lyrics_{field}", val)
|
||||
for lyr, _id in ids_with_lyrics
|
||||
for field in lyr_fields
|
||||
if (val := getattr(lyr, field)) is not None
|
||||
]
|
||||
if flex_rows:
|
||||
tx.mutate_many(
|
||||
f"""
|
||||
INSERT INTO {flex_table} (entity_id, key, value)
|
||||
VALUES (?, ?, ?)
|
||||
""",
|
||||
flex_rows,
|
||||
)
|
||||
|
||||
migrated += len(batch)
|
||||
|
||||
ui.print_(
|
||||
f" Migrated {migrated} {table} "
|
||||
f"({migrated}/{total} processed)..."
|
||||
)
|
||||
|
||||
ui.print_(f"Migration complete: {migrated} of {total} {table} updated")
|
||||
|
|
|
|||
143
beets/util/lyrics.py
Normal file
143
beets/util/lyrics.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from contextlib import suppress
|
||||
from dataclasses import dataclass, field
|
||||
from functools import cached_property
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from beets.util import unique_list
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beets.library import Item
|
||||
|
||||
INSTRUMENTAL_LYRICS = "[Instrumental]"
|
||||
BACKEND_NAMES = {"genius", "musixmatch", "lrclib", "tekstowo"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Lyrics:
|
||||
"""Represent lyrics text together with structured source metadata.
|
||||
|
||||
This value object keeps the canonical lyrics body, optional provenance, and
|
||||
optional translation metadata synchronized across fetching, translation, and
|
||||
persistence.
|
||||
"""
|
||||
|
||||
ORIGINAL_PAT = re.compile(r"[^\n]+ / ")
|
||||
TRANSLATION_PAT = re.compile(r" / [^\n]+")
|
||||
LINE_PARTS_PAT = re.compile(r"^(\[\d\d:\d\d\.\d\d\]|) *(.*)$")
|
||||
|
||||
text: str
|
||||
backend: str | None = None
|
||||
url: str | None = None
|
||||
language: str | None = None
|
||||
translation_language: str | None = None
|
||||
translations: list[str] = field(default_factory=list)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
"""Populate missing language metadata from the current text."""
|
||||
try:
|
||||
import langdetect
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
# Set seed to 0 for deterministic results
|
||||
langdetect.DetectorFactory.seed = 0
|
||||
|
||||
if not self.text or self.text == INSTRUMENTAL_LYRICS:
|
||||
return
|
||||
|
||||
if not self.language:
|
||||
with suppress(langdetect.LangDetectException):
|
||||
self.language = langdetect.detect(self.original_text).upper()
|
||||
|
||||
if not self.translation_language:
|
||||
all_lines = self.text.splitlines()
|
||||
lines_with_delimiter_count = sum(
|
||||
1 for ln in all_lines if " / " in ln
|
||||
)
|
||||
if lines_with_delimiter_count >= len(all_lines) / 2:
|
||||
# we are confident we are dealing with translations
|
||||
with suppress(langdetect.LangDetectException):
|
||||
self.translation_language = langdetect.detect(
|
||||
self.ORIGINAL_PAT.sub("", self.text)
|
||||
).upper()
|
||||
|
||||
@classmethod
|
||||
def from_legacy_text(cls, text: str) -> Lyrics:
|
||||
"""Build lyrics from legacy text that may include an inline source."""
|
||||
data: dict[str, Any] = {}
|
||||
data["text"], *suffix = text.split("\n\nSource: ")
|
||||
if suffix:
|
||||
url = suffix[0].strip()
|
||||
url_root = urlparse(url).netloc.removeprefix("www.").split(".")[0]
|
||||
data.update(
|
||||
url=url,
|
||||
backend=url_root if url_root in BACKEND_NAMES else "google",
|
||||
)
|
||||
|
||||
return cls(**data)
|
||||
|
||||
@classmethod
|
||||
def from_item(cls, item: Item) -> Lyrics:
|
||||
"""Build lyrics from an item's canonical text and flexible metadata."""
|
||||
data = {"text": item.lyrics}
|
||||
for key in ("backend", "url", "language", "translation_language"):
|
||||
data[key] = item.get(f"lyrics_{key}", with_album=False)
|
||||
|
||||
return cls(**data)
|
||||
|
||||
@cached_property
|
||||
def original_text(self) -> str:
|
||||
"""Return the original text without translations."""
|
||||
# Remove translations from the lyrics text.
|
||||
return self.TRANSLATION_PAT.sub("", self.text).strip()
|
||||
|
||||
@cached_property
|
||||
def _split_lines(self) -> list[tuple[str, str]]:
|
||||
"""Split lyrics into timestamp/text pairs for line-wise processing.
|
||||
|
||||
Timestamps, when present, are kept separate so callers can translate or
|
||||
normalize text without losing synced timing information.
|
||||
"""
|
||||
return [
|
||||
(m[1], m[2]) if (m := self.LINE_PARTS_PAT.match(line)) else ("", "")
|
||||
for line in self.text.splitlines()
|
||||
]
|
||||
|
||||
@cached_property
|
||||
def timestamps(self) -> list[str]:
|
||||
"""Return per-line timestamp prefixes from the lyrics text."""
|
||||
return [ts for ts, _ in self._split_lines]
|
||||
|
||||
@cached_property
|
||||
def text_lines(self) -> list[str]:
|
||||
"""Return per-line lyric text with timestamps removed."""
|
||||
return [ln for _, ln in self._split_lines]
|
||||
|
||||
@property
|
||||
def synced(self) -> bool:
|
||||
"""Return whether the lyrics contain synced timestamp markers."""
|
||||
return any(self.timestamps)
|
||||
|
||||
@property
|
||||
def translated(self) -> bool:
|
||||
"""Return whether translation metadata is available."""
|
||||
return bool(self.translation_language)
|
||||
|
||||
@property
|
||||
def full_text(self) -> str:
|
||||
"""Return canonical text with translations merged when available."""
|
||||
if not self.translations:
|
||||
return self.text
|
||||
|
||||
text_pairs = list(zip(self.text_lines, self.translations))
|
||||
|
||||
# only add the separator for non-empty and differing translations
|
||||
texts = [" / ".join(unique_list(filter(None, p))) for p in text_pairs]
|
||||
# only add the space between non-empty timestamps and texts
|
||||
return "\n".join(
|
||||
" ".join(filter(None, p)) for p in zip(self.timestamps, texts)
|
||||
)
|
||||
|
|
@ -85,6 +85,15 @@ class GeniusAPI:
|
|||
class Search(TypedDict):
|
||||
response: GeniusAPI.SearchResponse
|
||||
|
||||
class StatusResponse(TypedDict):
|
||||
status: int
|
||||
message: str
|
||||
|
||||
class Meta(TypedDict):
|
||||
meta: GeniusAPI.StatusResponse
|
||||
|
||||
Response = Search | Meta
|
||||
|
||||
|
||||
class GoogleCustomSearchAPI:
|
||||
class Response(TypedDict):
|
||||
|
|
|
|||
|
|
@ -29,14 +29,15 @@ from pathlib import Path
|
|||
from typing import TYPE_CHECKING, ClassVar, NamedTuple
|
||||
from urllib.parse import quote, quote_plus, urlencode, urlparse
|
||||
|
||||
import langdetect
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from unidecode import unidecode
|
||||
|
||||
from beets import plugins, ui
|
||||
from beets.autotag.distance import string_dist
|
||||
from beets.dbcore import types
|
||||
from beets.util.config import sanitize_choices
|
||||
from beets.util.lyrics import INSTRUMENTAL_LYRICS, Lyrics
|
||||
|
||||
from ._utils.requests import HTTPNotFoundError, RequestHandler
|
||||
|
||||
|
|
@ -57,14 +58,16 @@ if TYPE_CHECKING:
|
|||
TranslatorAPI,
|
||||
)
|
||||
|
||||
INSTRUMENTAL_LYRICS = "[Instrumental]"
|
||||
|
||||
|
||||
class CaptchaError(requests.exceptions.HTTPError):
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
super().__init__("Captcha is required", *args, **kwargs)
|
||||
|
||||
|
||||
class GeniusHTTPError(requests.exceptions.HTTPError):
|
||||
pass
|
||||
|
||||
|
||||
# Utilities.
|
||||
|
||||
|
||||
|
|
@ -241,13 +244,16 @@ class Backend(LyricsRequestHandler, metaclass=BackendClass):
|
|||
|
||||
def fetch(
|
||||
self, artist: str, title: str, album: str, length: int
|
||||
) -> tuple[str, str] | None:
|
||||
) -> Lyrics | None:
|
||||
"""Return lyrics for a song, or ``None`` when no match is found."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@dataclass
|
||||
@total_ordering
|
||||
class LRCLyrics:
|
||||
"""Hold LRCLib candidate data and ranking helpers for matching."""
|
||||
|
||||
#: Percentage tolerance for max duration difference between lyrics and item.
|
||||
DURATION_DIFF_TOLERANCE = 0.05
|
||||
|
||||
|
|
@ -262,17 +268,37 @@ class LRCLyrics:
|
|||
"""Compare two lyrics items by their score."""
|
||||
return self.dist < other.dist
|
||||
|
||||
@classmethod
|
||||
def verify_synced_lyrics(
|
||||
cls, duration: float, lyrics: str | None
|
||||
) -> str | None:
|
||||
"""Accept synced lyrics only when the final timestamp fits duration."""
|
||||
if lyrics and (
|
||||
m := Lyrics.LINE_PARTS_PAT.match(lyrics.splitlines()[-1])
|
||||
):
|
||||
ts, _ = m.groups()
|
||||
if ts:
|
||||
mm, ss = map(float, ts.strip("[]").split(":"))
|
||||
if 60 * mm + ss <= duration:
|
||||
return lyrics
|
||||
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def make(
|
||||
cls, candidate: LRCLibAPI.Item, target_duration: float
|
||||
) -> LRCLyrics:
|
||||
"""Build a scored candidate from LRCLib payload data."""
|
||||
duration = candidate["duration"] or 0.0
|
||||
return cls(
|
||||
target_duration,
|
||||
candidate["id"],
|
||||
candidate["duration"] or 0.0,
|
||||
duration,
|
||||
candidate["instrumental"],
|
||||
candidate["plainLyrics"],
|
||||
candidate["syncedLyrics"],
|
||||
cls.verify_synced_lyrics(
|
||||
target_duration, candidate["syncedLyrics"]
|
||||
),
|
||||
)
|
||||
|
||||
@cached_property
|
||||
|
|
@ -305,6 +331,7 @@ class LRCLyrics:
|
|||
return not self.synced, self.duration_dist
|
||||
|
||||
def get_text(self, want_synced: bool) -> str:
|
||||
"""Return the preferred text form for this candidate."""
|
||||
if self.instrumental:
|
||||
return INSTRUMENTAL_LYRICS
|
||||
|
||||
|
|
@ -351,7 +378,7 @@ class LRCLib(Backend):
|
|||
|
||||
def fetch(
|
||||
self, artist: str, title: str, album: str, length: int
|
||||
) -> tuple[str, str] | None:
|
||||
) -> Lyrics | None:
|
||||
"""Fetch lyrics text for the given song data."""
|
||||
evaluate_item = partial(LRCLyrics.make, target_duration=length)
|
||||
|
||||
|
|
@ -359,7 +386,9 @@ class LRCLib(Backend):
|
|||
candidates = [evaluate_item(item) for item in group]
|
||||
if item := self.pick_best_match(candidates):
|
||||
lyrics = item.get_text(self.config["synced"].get(bool))
|
||||
return lyrics, f"{self.GET_URL}/{item.id}"
|
||||
return Lyrics(
|
||||
lyrics, self.__class__.name, f"{self.GET_URL}/{item.id}"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
|
@ -387,7 +416,7 @@ class MusiXmatch(Backend):
|
|||
def build_url(cls, *args: str) -> str:
|
||||
return cls.URL_TEMPLATE.format(*map(cls.encode, args))
|
||||
|
||||
def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None:
|
||||
def fetch(self, artist: str, title: str, *_) -> Lyrics | None:
|
||||
url = self.build_url(artist, title)
|
||||
|
||||
html = self.get_text(url)
|
||||
|
|
@ -409,7 +438,7 @@ class MusiXmatch(Backend):
|
|||
# sometimes there are non-existent lyrics with some content
|
||||
if "Lyrics | Musixmatch" in lyrics:
|
||||
return None
|
||||
return lyrics, url
|
||||
return Lyrics(lyrics, self.__class__.name, url)
|
||||
|
||||
|
||||
class Html:
|
||||
|
|
@ -512,13 +541,13 @@ class SearchBackend(SoupMixin, Backend):
|
|||
if check_match(candidate):
|
||||
yield candidate
|
||||
|
||||
def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None:
|
||||
def fetch(self, artist: str, title: str, *_) -> Lyrics | None:
|
||||
"""Fetch lyrics for the given artist and title."""
|
||||
for result in self.get_results(artist, title):
|
||||
if (html := self.get_text(result.url)) and (
|
||||
lyrics := self.scrape(html)
|
||||
):
|
||||
return lyrics, result.url
|
||||
return Lyrics(lyrics, self.__class__.name, result.url)
|
||||
|
||||
return None
|
||||
|
||||
|
|
@ -544,8 +573,16 @@ class Genius(SearchBackend):
|
|||
def headers(self) -> dict[str, str]:
|
||||
return {"Authorization": f"Bearer {self.config['genius_api_key']}"}
|
||||
|
||||
def get_json(self, *args, **kwargs) -> GeniusAPI.Search:
|
||||
response: GeniusAPI.Response = super().get_json(*args, **kwargs)
|
||||
if "response" in response:
|
||||
return response # type: ignore[return-value]
|
||||
|
||||
meta = response["meta"]
|
||||
raise GeniusHTTPError(f"{meta['message']} Status: {meta['status']}")
|
||||
|
||||
def search(self, artist: str, title: str) -> Iterable[SearchResult]:
|
||||
search_data: GeniusAPI.Search = self.get_json(
|
||||
search_data = self.get_json(
|
||||
self.SEARCH_URL,
|
||||
params={"q": f"{artist} {title}"},
|
||||
headers=self.headers,
|
||||
|
|
@ -730,12 +767,10 @@ class Google(SearchBackend):
|
|||
|
||||
@dataclass
|
||||
class Translator(LyricsRequestHandler):
|
||||
"""Translate lyrics text while preserving existing structured metadata."""
|
||||
|
||||
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
|
||||
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
|
||||
SEPARATOR = " | "
|
||||
remove_translations = staticmethod(
|
||||
partial(re.compile(r" / [^\n]+").sub, "")
|
||||
)
|
||||
|
||||
_log: Logger
|
||||
api_key: str
|
||||
|
|
@ -750,6 +785,7 @@ class Translator(LyricsRequestHandler):
|
|||
to_language: str,
|
||||
from_languages: list[str] | None = None,
|
||||
) -> Translator:
|
||||
"""Construct a translator with normalized language configuration."""
|
||||
return cls(
|
||||
log,
|
||||
api_key,
|
||||
|
|
@ -757,7 +793,7 @@ class Translator(LyricsRequestHandler):
|
|||
[x.upper() for x in from_languages or []],
|
||||
)
|
||||
|
||||
def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]:
|
||||
def get_translations(self, texts: Iterable[str]) -> list[str]:
|
||||
"""Return translations for the given texts.
|
||||
|
||||
To reduce the translation 'cost', we translate unique texts, and then
|
||||
|
|
@ -775,37 +811,9 @@ class Translator(LyricsRequestHandler):
|
|||
translated_text = data[0]["translations"][0]["text"]
|
||||
translations = translated_text.split(self.SEPARATOR)
|
||||
trans_by_text = dict(zip(unique_texts, translations))
|
||||
return list(zip(texts, (trans_by_text.get(t, "") for t in texts)))
|
||||
return [trans_by_text.get(t, "") for t in texts]
|
||||
|
||||
@classmethod
|
||||
def split_line(cls, line: str) -> tuple[str, str]:
|
||||
"""Split line to (timestamp, text)."""
|
||||
if m := cls.LINE_PARTS_RE.match(line):
|
||||
return m[1], m[2]
|
||||
|
||||
return "", ""
|
||||
|
||||
def append_translations(self, lines: Iterable[str]) -> list[str]:
|
||||
"""Append translations to the given lyrics texts.
|
||||
|
||||
Lines may contain timestamps from LRCLib which need to be temporarily
|
||||
removed for the translation. They can take any of these forms:
|
||||
- empty
|
||||
Text - text only
|
||||
[00:00:00] - timestamp only
|
||||
[00:00:00] Text - timestamp with text
|
||||
"""
|
||||
# split into [(timestamp, text), ...]]
|
||||
ts_and_text = list(map(self.split_line, lines))
|
||||
timestamps = [ts for ts, _ in ts_and_text]
|
||||
text_pairs = self.get_translations([ln for _, ln in ts_and_text])
|
||||
|
||||
# only add the separator for non-empty translations
|
||||
texts = [" / ".join(filter(None, p)) for p in text_pairs]
|
||||
# only add the space between non-empty timestamps and texts
|
||||
return [" ".join(filter(None, p)) for p in zip(timestamps, texts)]
|
||||
|
||||
def translate(self, new_lyrics: str, old_lyrics: str) -> str:
|
||||
def translate(self, lyrics: Lyrics, old_lyrics: Lyrics) -> Lyrics:
|
||||
"""Translate the given lyrics to the target language.
|
||||
|
||||
Check old lyrics for existing translations and return them if their
|
||||
|
|
@ -814,38 +822,33 @@ class Translator(LyricsRequestHandler):
|
|||
|
||||
If the lyrics are already in the target language or not in any of
|
||||
of the source languages (if configured), they are returned as is.
|
||||
|
||||
The footer with the source URL is preserved, if present.
|
||||
"""
|
||||
if (
|
||||
" / " in old_lyrics
|
||||
and self.remove_translations(old_lyrics) == new_lyrics
|
||||
):
|
||||
lyrics.original_text
|
||||
) == old_lyrics.original_text and old_lyrics.translated:
|
||||
self.info("🔵 Translations already exist")
|
||||
return old_lyrics
|
||||
|
||||
lyrics_language = langdetect.detect(new_lyrics).upper()
|
||||
if lyrics_language == self.to_language:
|
||||
if (lyrics_language := lyrics.language) == self.to_language:
|
||||
self.info(
|
||||
"🔵 Lyrics are already in the target language {.to_language}",
|
||||
self,
|
||||
)
|
||||
return new_lyrics
|
||||
|
||||
if self.from_languages and lyrics_language not in self.from_languages:
|
||||
elif (
|
||||
from_lang_config := self.from_languages
|
||||
) and lyrics_language not in from_lang_config:
|
||||
self.info(
|
||||
"🔵 Configuration {.from_languages} does not permit translating"
|
||||
" from {}",
|
||||
self,
|
||||
"🔵 Configuration {} does not permit translating from {}",
|
||||
from_lang_config,
|
||||
lyrics_language,
|
||||
)
|
||||
return new_lyrics
|
||||
else:
|
||||
with self.handle_request():
|
||||
lyrics.translations = self.get_translations(lyrics.text_lines)
|
||||
lyrics.translation_language = self.to_language
|
||||
self.info("🟢 Translated lyrics to {.to_language}", self)
|
||||
|
||||
lyrics, *url = new_lyrics.split("\n\nSource: ")
|
||||
with self.handle_request():
|
||||
translated_lines = self.append_translations(lyrics.splitlines())
|
||||
self.info("🟢 Translated lyrics to {.to_language}", self)
|
||||
return "\n\nSource: ".join(["\n".join(translated_lines), *url])
|
||||
return lyrics
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -939,21 +942,29 @@ class RestFiles:
|
|||
ui.print_(textwrap.dedent(text))
|
||||
|
||||
|
||||
BACKEND_BY_NAME = {
|
||||
b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch]
|
||||
}
|
||||
|
||||
|
||||
class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin):
|
||||
BACKEND_BY_NAME: ClassVar[dict[str, type[Backend]]] = {
|
||||
b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch]
|
||||
item_types: ClassVar[dict[str, types.Type]] = {
|
||||
"lyrics_url": types.STRING,
|
||||
"lyrics_backend": types.STRING,
|
||||
"lyrics_language": types.STRING,
|
||||
"lyrics_translation_language": types.STRING,
|
||||
}
|
||||
|
||||
@cached_property
|
||||
def backends(self) -> list[Backend]:
|
||||
user_sources = self.config["sources"].as_str_seq()
|
||||
|
||||
chosen = sanitize_choices(user_sources, self.BACKEND_BY_NAME)
|
||||
chosen = sanitize_choices(user_sources, BACKEND_BY_NAME)
|
||||
if "google" in chosen and not self.config["google_API_key"].get():
|
||||
self.warn("Disabling Google source: no API key configured.")
|
||||
chosen.remove("google")
|
||||
|
||||
return [self.BACKEND_BY_NAME[c](self.config, self._log) for c in chosen]
|
||||
return [BACKEND_BY_NAME[c](self.config, self._log) for c in chosen]
|
||||
|
||||
@cached_property
|
||||
def translator(self) -> Translator | None:
|
||||
|
|
@ -988,7 +999,7 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin):
|
|||
# currently block requests with the beets user agent.
|
||||
"sources": [
|
||||
n
|
||||
for n in self.BACKEND_BY_NAME
|
||||
for n in BACKEND_BY_NAME
|
||||
if n not in {"musixmatch", "tekstowo"}
|
||||
],
|
||||
}
|
||||
|
|
@ -1057,18 +1068,16 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin):
|
|||
for item in task.imported_items():
|
||||
self.add_item_lyrics(item, False)
|
||||
|
||||
def find_lyrics(self, item: Item) -> str:
|
||||
def find_lyrics(self, item: Item) -> Lyrics | None:
|
||||
"""Return the first lyrics match from the configured source search."""
|
||||
album, length = item.album, round(item.length)
|
||||
matches = (
|
||||
[
|
||||
lyrics
|
||||
for t in titles
|
||||
if (lyrics := self.get_lyrics(a, t, album, length))
|
||||
]
|
||||
self.get_lyrics(a, t, album, length)
|
||||
for a, titles in search_pairs(item)
|
||||
for t in titles
|
||||
)
|
||||
|
||||
return "\n\n---\n\n".join(next(filter(None, matches), []))
|
||||
return next(filter(None, matches), None)
|
||||
|
||||
def add_item_lyrics(self, item: Item, write: bool) -> None:
|
||||
"""Fetch and store lyrics for a single item. If ``write``, then the
|
||||
|
|
@ -1081,29 +1090,44 @@ class LyricsPlugin(LyricsRequestHandler, plugins.BeetsPlugin):
|
|||
self.info("🔵 Lyrics already present: {}", item)
|
||||
return
|
||||
|
||||
if lyrics := self.find_lyrics(item):
|
||||
existing_lyrics = Lyrics.from_item(item)
|
||||
if new_lyrics := self.find_lyrics(item):
|
||||
self.info("🟢 Found lyrics: {}", item)
|
||||
if translator := self.translator:
|
||||
lyrics = translator.translate(lyrics, item.lyrics)
|
||||
new_lyrics = translator.translate(new_lyrics, existing_lyrics)
|
||||
|
||||
synced_mode = self.config["synced"].get(bool)
|
||||
if synced_mode and existing_lyrics.synced and not new_lyrics.synced:
|
||||
self.info(
|
||||
"🔴 Not updating synced lyrics with non-synced ones: {}",
|
||||
item,
|
||||
)
|
||||
return
|
||||
|
||||
for key in ("backend", "url", "language", "translation_language"):
|
||||
item_key = f"lyrics_{key}"
|
||||
if value := getattr(new_lyrics, key):
|
||||
item[item_key] = value
|
||||
elif item_key in item:
|
||||
del item[item_key]
|
||||
|
||||
lyrics_text = new_lyrics.full_text
|
||||
else:
|
||||
self.info("🔴 Lyrics not found: {}", item)
|
||||
lyrics = self.config["fallback"].get()
|
||||
lyrics_text = self.config["fallback"].get()
|
||||
|
||||
if lyrics not in {None, item.lyrics}:
|
||||
item.lyrics = lyrics
|
||||
if lyrics_text not in {None, item.lyrics}:
|
||||
item.lyrics = lyrics_text
|
||||
item.store()
|
||||
if write:
|
||||
item.try_write()
|
||||
item.store()
|
||||
|
||||
def get_lyrics(self, artist: str, title: str, *args) -> str | None:
|
||||
"""Fetch lyrics, trying each source in turn. Return a string or
|
||||
None if no lyrics were found.
|
||||
"""
|
||||
def get_lyrics(self, artist: str, title: str, *args) -> Lyrics | None:
|
||||
"""Get first found lyrics, trying each source in turn."""
|
||||
self.info("Fetching lyrics for {} - {}", artist, title)
|
||||
for backend in self.backends:
|
||||
with backend.handle_request():
|
||||
if lyrics_info := backend.fetch(artist, title, *args):
|
||||
lyrics, url = lyrics_info
|
||||
return f"{lyrics}\n\nSource: {url}"
|
||||
return lyrics_info
|
||||
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ Transactions
|
|||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
Migration
|
||||
Transaction
|
||||
|
||||
Queries
|
||||
|
|
|
|||
|
|
@ -38,6 +38,14 @@ New features
|
|||
3. Comma followed by a space
|
||||
4. Slash wrapped by spaces
|
||||
|
||||
- :doc:`plugins/lyrics`: With ``synced`` enabled, existing synced lyrics are no
|
||||
longer replaced by newly fetched plain lyrics, even when ``force`` is enabled.
|
||||
- :doc:`plugins/lyrics`: Remove ``Source: <lyrics-url>`` suffix from lyrics.
|
||||
Store the backend name in ``lyrics_backend``, URL in ``lyrics_url``, language
|
||||
in ``lyrics_language`` and translation language (if translations present) in
|
||||
``lyrics_translation_language`` flexible attributes. Lyrics are automatically
|
||||
migrated on the first beets run. :bug:`6370`
|
||||
|
||||
Bug fixes
|
||||
~~~~~~~~~
|
||||
|
||||
|
|
@ -84,6 +92,9 @@ Other changes
|
|||
Since genres are now stored as a list in the ``genres`` field and written to
|
||||
files as individual genre tags, this option has no effect and has been
|
||||
removed.
|
||||
- :doc:`plugins/lyrics`: To cut down noise from the ``lrclib`` lyrics source,
|
||||
synced lyrics are now checked to ensure the final verse falls within the
|
||||
track's duration.
|
||||
|
||||
2.6.2 (February 22, 2026)
|
||||
-------------------------
|
||||
|
|
|
|||
|
|
@ -166,6 +166,53 @@ rolling back the transaction if an error occurs.
|
|||
|
||||
.. _blog post: https://beets.io/blog/sqlite-nightmare.html
|
||||
|
||||
Migrations
|
||||
~~~~~~~~~~
|
||||
|
||||
The database layer includes a first-class migration system for data changes that
|
||||
must happen alongside schema evolution. This keeps compatibility work explicit,
|
||||
testable, and isolated from normal query and model code.
|
||||
|
||||
Each database subclass declares its migrations in ``_migrations`` as pairs of a
|
||||
migration class and the model classes it applies to. During startup, the
|
||||
database creates required tables and columns first, then executes configured
|
||||
migrations.
|
||||
|
||||
Migration completion is tracked in a dedicated ``migrations`` table keyed by
|
||||
migration name and table name. This means each migration runs at most once per
|
||||
table, so large one-time data rewrites can be safely coordinated across
|
||||
restarts.
|
||||
|
||||
The migration name is derived from the migration class name. Because that name
|
||||
is the persisted identity in the ``migrations`` table, renaming a released
|
||||
migration class changes its identity and can cause the migration to run again.
|
||||
Treat migration class names as stable once shipped.
|
||||
|
||||
For example, ``MultiGenreFieldMigration`` becomes ``multi_genre_field``. After
|
||||
it runs for the ``items`` table, beets records a row equivalent to:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
name = "multi_genre_field", table_name = "items"
|
||||
|
||||
Common use cases include:
|
||||
|
||||
1. Backfilling a newly introduced canonical field from older data.
|
||||
2. Normalizing legacy free-form values into a structured representation.
|
||||
3. Splitting mixed-content legacy fields into cleaned primary content plus
|
||||
auxiliary metadata stored as flexible attributes.
|
||||
|
||||
To add a migration:
|
||||
|
||||
1. Create a :class:`beets.dbcore.db.Migration` subclass.
|
||||
2. Implement the table-specific data rewrite logic in ``_migrate_data``.
|
||||
3. Register the migration in the database subclass ``_migrations`` list for the
|
||||
target models.
|
||||
|
||||
In practice, migrations should be idempotent and conservative: gate behavior on
|
||||
the current schema when needed, keep writes transactional, and batch large
|
||||
updates so startup remains predictable for real libraries.
|
||||
|
||||
Queries
|
||||
-------
|
||||
|
||||
|
|
|
|||
|
|
@ -25,9 +25,17 @@ Fetch Lyrics During Import
|
|||
--------------------------
|
||||
|
||||
When importing new files, beets will now fetch lyrics for files that don't
|
||||
already have them. The lyrics will be stored in the beets database. If the
|
||||
``import.write`` config option is on, then the lyrics will also be written to
|
||||
the files' tags.
|
||||
already have them. The lyrics will be stored in the beets database. The plugin
|
||||
also sets a few useful flexible attributes:
|
||||
|
||||
- ``lyrics_backend``: name of the backend that provided the lyrics
|
||||
- ``lyrics_url``: URL of the page where the lyrics were found
|
||||
- ``lyrics_language``: original language of the lyrics
|
||||
- ``lyrics_translation_language``: language of the lyrics translation (if
|
||||
translation is enabled)
|
||||
|
||||
If the ``import.write`` config option is on, then the lyrics will also be
|
||||
written to the files' tags.
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
|
@ -83,7 +91,9 @@ The available options are:
|
|||
deactivated if no ``google_API_key`` is setup. By default, ``musixmatch`` and
|
||||
``tekstowo`` are excluded because they block the beets User-Agent.
|
||||
- **synced**: Prefer synced lyrics over plain lyrics if a source offers them.
|
||||
Currently ``lrclib`` is the only source that provides them.
|
||||
Currently ``lrclib`` is the only source that provides them. Using this option,
|
||||
existing synced lyrics are not replaced by newly fetched plain lyrics (even
|
||||
when ``force`` is enabled). To allow that replacement, disable ``synced``.
|
||||
|
||||
.. _beets custom search engine: https://www.google.com:443/cse/publicurl?cx=009217259823014548361:lndtuqkycfu
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
import textwrap
|
||||
|
||||
import pytest
|
||||
|
||||
from beets.dbcore import types
|
||||
from beets.library.migrations import MultiGenreFieldMigration
|
||||
from beets.library import migrations
|
||||
from beets.library.models import Album, Item
|
||||
from beets.test.helper import TestHelper
|
||||
|
||||
|
|
@ -30,13 +32,13 @@ class TestMultiGenreFieldMigration:
|
|||
# and now configure the migrations to be tested
|
||||
monkeypatch.setattr(
|
||||
"beets.library.library.Library._migrations",
|
||||
((MultiGenreFieldMigration, (Item, Album)),),
|
||||
((migrations.MultiGenreFieldMigration, (Item, Album)),),
|
||||
)
|
||||
yield helper
|
||||
|
||||
helper.teardown_beets()
|
||||
|
||||
def test_migrates_only_rows_with_missing_genres(self, helper: TestHelper):
|
||||
def test_migrate(self, helper: TestHelper):
|
||||
helper.config["lastgenre"]["separator"] = " - "
|
||||
|
||||
expected_item_genres = []
|
||||
|
|
@ -70,3 +72,63 @@ class TestMultiGenreFieldMigration:
|
|||
del helper.lib.db_tables
|
||||
assert helper.lib.migration_exists("multi_genre_field", "items")
|
||||
assert helper.lib.migration_exists("multi_genre_field", "albums")
|
||||
|
||||
|
||||
class TestLyricsMetadataInFlexFieldsMigration:
|
||||
@pytest.fixture
|
||||
def helper(self, monkeypatch):
|
||||
# do not apply migrations upon library initialization
|
||||
monkeypatch.setattr("beets.library.library.Library._migrations", ())
|
||||
|
||||
helper = TestHelper()
|
||||
helper.setup_beets()
|
||||
|
||||
# and now configure the migrations to be tested
|
||||
monkeypatch.setattr(
|
||||
"beets.library.library.Library._migrations",
|
||||
((migrations.LyricsMetadataInFlexFieldsMigration, (Item,)),),
|
||||
)
|
||||
yield helper
|
||||
|
||||
helper.teardown_beets()
|
||||
|
||||
def test_migrate(self, helper: TestHelper):
|
||||
lyrics_item = helper.add_item(
|
||||
lyrics=textwrap.dedent("""
|
||||
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
|
||||
[00:00.50]
|
||||
[00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées
|
||||
|
||||
Source: https://lrclib.net/api/1/""")
|
||||
)
|
||||
instrumental_lyrics_item = helper.add_item(lyrics="[Instrumental]")
|
||||
|
||||
helper.lib._migrate()
|
||||
|
||||
lyrics_item.load()
|
||||
|
||||
assert lyrics_item.lyrics == textwrap.dedent(
|
||||
"""
|
||||
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
|
||||
[00:00.50]
|
||||
[00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées"""
|
||||
)
|
||||
assert lyrics_item.lyrics_backend == "lrclib"
|
||||
assert lyrics_item.lyrics_url == "https://lrclib.net/api/1/"
|
||||
assert lyrics_item.lyrics_language == "EN"
|
||||
assert lyrics_item.lyrics_translation_language == "FR"
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
instrumental_lyrics_item.lyrics_backend
|
||||
with pytest.raises(AttributeError):
|
||||
instrumental_lyrics_item.lyrics_url
|
||||
with pytest.raises(AttributeError):
|
||||
instrumental_lyrics_item.lyrics_language
|
||||
with pytest.raises(AttributeError):
|
||||
instrumental_lyrics_item.lyrics_translation_language
|
||||
|
||||
# remove cached initial db tables data
|
||||
del helper.lib.db_tables
|
||||
assert helper.lib.migration_exists(
|
||||
"lyrics_metadata_in_flex_fields", "items"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ class LyricsPage(NamedTuple):
|
|||
lyrics: str
|
||||
artist: str = "The Beatles"
|
||||
track_title: str = "Lady Madonna"
|
||||
language: str = "EN"
|
||||
url_title: str | None = None # only relevant to the Google backend
|
||||
marks: list[str] = [] # markers for pytest.param # noqa: RUF012
|
||||
|
||||
|
|
@ -127,21 +128,20 @@ lyrics_pages = [
|
|||
""",
|
||||
artist="Atlanta",
|
||||
track_title="Mergaitės Nori Mylėt",
|
||||
language="LT",
|
||||
url_title="Mergaitės nori mylėt – Atlanta | Dainų Žodžiai",
|
||||
marks=[xfail_on_ci("Expired SSL certificate")],
|
||||
),
|
||||
LyricsPage.make(
|
||||
"https://genius.com/The-beatles-lady-madonna-lyrics",
|
||||
"""
|
||||
[Intro: Instrumental]
|
||||
|
||||
[Verse 1: Paul McCartney]
|
||||
Lady Madonna, children at your feet
|
||||
Wonder how you manage to make ends meet
|
||||
Who finds the money when you pay the rent?
|
||||
Did you think that money was heaven sent?
|
||||
|
||||
[Bridge: Paul McCartney]
|
||||
[Bridge: Paul McCartney, Paul McCartney, John Lennon & George Harrison]
|
||||
Friday night arrives without a suitcase
|
||||
Sunday morning creeping like a nun
|
||||
Monday's child has learned to tie his bootlace
|
||||
|
|
@ -150,27 +150,28 @@ lyrics_pages = [
|
|||
[Verse 2: Paul McCartney]
|
||||
Lady Madonna, baby at your breast
|
||||
Wonders how you manage to feed the rest
|
||||
|
||||
[Bridge: Paul McCartney, John Lennon & George Harrison]
|
||||
[Tenor Saxophone Solo: Ronnie Scott]
|
||||
|
||||
[Bridge: John Lennon & George Harrison, Paul McCartney, John Lennon & George Harrison]
|
||||
Pa-pa-pa-pa, pa-pa-pa-pa-pa
|
||||
Pa-pa-pa-pa-pa, pa-pa-pa, pa-pa, pa-pa
|
||||
Pa-pa-pa-pa, pa-pa-pa-pa-pa
|
||||
See how they run
|
||||
|
||||
[Verse 3: Paul McCartney]
|
||||
Lady Madonna, lying on the bed
|
||||
Listen to the music playing in your head
|
||||
|
||||
[Bridge: Paul McCartney]
|
||||
Tuesday afternoon is never ending
|
||||
Wednesday morning papers didn't come
|
||||
Thursday night your stockings needed mending
|
||||
[Bridge: Paul McCartney, John Lennon & George Harrison, Paul McCartney, John Lennon & George Harrison]
|
||||
Tuesday afternoon is never ending (Pa-pa-pa-pa, pa-pa-pa-pa-pa)
|
||||
Wednesday morning, papers didn't come (Pa-pa-pa-pa-pa, pa-pa-pa, pa-pa, pa-pa)
|
||||
Thursday night, your stockings needed mending (Pa-pa-pa-pa, pa-pa-pa-pa-pa)
|
||||
See how they run
|
||||
|
||||
[Verse 4: Paul McCartney]
|
||||
Lady Madonna, children at your feet
|
||||
Wonder how you manage to make ends meet
|
||||
|
||||
[Outro: Instrumental]
|
||||
""",
|
||||
""", # noqa: E501
|
||||
marks=[xfail_on_ci("Genius returns 403 FORBIDDEN in CI")],
|
||||
),
|
||||
LyricsPage.make(
|
||||
|
|
@ -222,6 +223,7 @@ lyrics_pages = [
|
|||
Je me demande comment vous vous débrouillez pour joindre les deux bouts
|
||||
""",
|
||||
url_title="Paroles et traduction The Beatles : Lady Madonna - paroles de chanson", # noqa: E501
|
||||
language="FR",
|
||||
),
|
||||
LyricsPage.make(
|
||||
# note that this URL needs to be followed with a slash, otherwise it
|
||||
|
|
@ -269,7 +271,7 @@ lyrics_pages = [
|
|||
url_title="Lady Madonna - The Beatles - LETRAS.MUS.BR",
|
||||
),
|
||||
LyricsPage.make(
|
||||
"https://lrclib.net/api/get/14038",
|
||||
"https://lrclib.net/api/get/19648857",
|
||||
"""
|
||||
[00:08.35] Lady Madonna, children at your feet
|
||||
[00:12.85] Wonder how you manage to make ends meet
|
||||
|
|
|
|||
|
|
@ -23,9 +23,11 @@ from http import HTTPStatus
|
|||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from beets.library import Item
|
||||
from beets.test.helper import PluginMixin, TestHelper
|
||||
from beets.util.lyrics import Lyrics
|
||||
from beetsplug import lyrics
|
||||
|
||||
from .lyrics_pages import lyrics_pages
|
||||
|
|
@ -249,26 +251,117 @@ class TestLyricsPlugin(LyricsPluginMixin):
|
|||
assert re.search(expected_log_match, last_log, re.I)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"plugin_config, found, expected",
|
||||
"plugin_config, old_lyrics, found, expected",
|
||||
[
|
||||
({}, "new", "old"),
|
||||
({"force": True}, "new", "new"),
|
||||
({"force": True, "local": True}, "new", "old"),
|
||||
({"force": True, "fallback": None}, "", "old"),
|
||||
({"force": True, "fallback": ""}, "", ""),
|
||||
({"force": True, "fallback": "default"}, "", "default"),
|
||||
pytest.param(
|
||||
{},
|
||||
"old",
|
||||
"new",
|
||||
"old",
|
||||
id="no_force_keeps_old",
|
||||
),
|
||||
pytest.param(
|
||||
{"force": True},
|
||||
"old",
|
||||
"new",
|
||||
"new",
|
||||
id="force_overwrites_with_new",
|
||||
),
|
||||
pytest.param(
|
||||
{"force": True, "local": True},
|
||||
"old",
|
||||
"new",
|
||||
"old",
|
||||
id="force_local_keeps_old",
|
||||
),
|
||||
pytest.param(
|
||||
{"force": True, "fallback": None},
|
||||
"old",
|
||||
None,
|
||||
"old",
|
||||
id="force_fallback_none_keeps_old",
|
||||
),
|
||||
pytest.param(
|
||||
{"force": True, "fallback": ""},
|
||||
"old",
|
||||
None,
|
||||
"",
|
||||
id="force_fallback_empty_uses_empty",
|
||||
),
|
||||
pytest.param(
|
||||
{"force": True, "fallback": "default"},
|
||||
"old",
|
||||
None,
|
||||
"default",
|
||||
id="force_fallback_default_uses_default",
|
||||
),
|
||||
pytest.param(
|
||||
{"force": True, "synced": True},
|
||||
"[00:00.00] old synced",
|
||||
"new plain",
|
||||
"[00:00.00] old synced",
|
||||
id="keep-existing-synced-lyrics",
|
||||
),
|
||||
pytest.param(
|
||||
{"force": True, "synced": True},
|
||||
"[00:00.00] old synced",
|
||||
"[00:00.00] new synced",
|
||||
"[00:00.00] new synced",
|
||||
id="replace-with-new-synced-lyrics",
|
||||
),
|
||||
pytest.param(
|
||||
{"force": True, "synced": False},
|
||||
"[00:00.00] old synced",
|
||||
"new plain",
|
||||
"new plain",
|
||||
id="replace-with-unsynced-lyrics-when-disabled",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_overwrite_config(
|
||||
self, monkeypatch, helper, lyrics_plugin, found, expected
|
||||
self,
|
||||
monkeypatch,
|
||||
helper,
|
||||
lyrics_plugin,
|
||||
old_lyrics,
|
||||
found,
|
||||
expected,
|
||||
):
|
||||
monkeypatch.setattr(lyrics_plugin, "find_lyrics", lambda _: found)
|
||||
item = helper.create_item(id=1, lyrics="old")
|
||||
monkeypatch.setattr(
|
||||
lyrics_plugin,
|
||||
"find_lyrics",
|
||||
lambda _: Lyrics(found) if found is not None else None,
|
||||
)
|
||||
item = helper.create_item(id=1, lyrics=old_lyrics)
|
||||
|
||||
lyrics_plugin.add_item_lyrics(item, False)
|
||||
|
||||
assert item.lyrics == expected
|
||||
|
||||
def test_set_additional_lyrics_info(
|
||||
self, monkeypatch, helper, lyrics_plugin
|
||||
):
|
||||
lyrics = Lyrics(
|
||||
"sing in the rain every hour of the day",
|
||||
"lrclib",
|
||||
url="https://lrclib.net/api/1",
|
||||
)
|
||||
monkeypatch.setattr(lyrics_plugin, "find_lyrics", lambda _: lyrics)
|
||||
item = helper.add_item(
|
||||
id=1, lyrics="", lyrics_translation_language="EN"
|
||||
)
|
||||
|
||||
lyrics_plugin.add_item_lyrics(item, False)
|
||||
|
||||
item = helper.lib.get_item(item.id)
|
||||
|
||||
assert item.lyrics_url == lyrics.url
|
||||
assert item.lyrics_backend == lyrics.backend
|
||||
assert item.lyrics_language == "EN"
|
||||
# make sure translation language is cleared
|
||||
with pytest.raises(AttributeError):
|
||||
item.lyrics_translation_language
|
||||
|
||||
|
||||
class LyricsBackendTest(LyricsPluginMixin):
|
||||
@pytest.fixture
|
||||
|
|
@ -316,21 +409,29 @@ class TestLyricsSources(LyricsBackendTest):
|
|||
}
|
||||
requests_mock.get(lyrics.Google.SEARCH_URL, json=data)
|
||||
|
||||
def test_backend_source(self, lyrics_plugin, lyrics_page: LyricsPage):
|
||||
def test_backend_source(
|
||||
self, monkeypatch, lyrics_plugin, lyrics_page: LyricsPage
|
||||
):
|
||||
"""Test parsed lyrics from each of the configured lyrics pages."""
|
||||
lyrics_info = lyrics_plugin.find_lyrics(
|
||||
monkeypatch.setattr(
|
||||
"beetsplug.lyrics.LyricsRequestHandler.create_session",
|
||||
lambda _: requests.Session(),
|
||||
)
|
||||
|
||||
assert lyrics_plugin.find_lyrics(
|
||||
Item(
|
||||
artist=lyrics_page.artist,
|
||||
title=lyrics_page.track_title,
|
||||
album="",
|
||||
length=186.0,
|
||||
)
|
||||
) == Lyrics(
|
||||
lyrics_page.lyrics,
|
||||
lyrics_page.backend,
|
||||
url=lyrics_page.url,
|
||||
language=lyrics_page.language,
|
||||
)
|
||||
|
||||
assert lyrics_info
|
||||
lyrics, _ = lyrics_info.split("\n\nSource: ")
|
||||
assert lyrics == lyrics_page.lyrics
|
||||
|
||||
|
||||
class TestGoogleLyrics(LyricsBackendTest):
|
||||
"""Test scraping heuristics on a fake html page."""
|
||||
|
|
@ -448,7 +549,7 @@ def lyrics_match(**overrides):
|
|||
"id": 1,
|
||||
"instrumental": False,
|
||||
"duration": LYRICS_DURATION,
|
||||
"syncedLyrics": "synced",
|
||||
"syncedLyrics": "[00:00.00] synced",
|
||||
"plainLyrics": "plain",
|
||||
**overrides,
|
||||
}
|
||||
|
|
@ -456,6 +557,7 @@ def lyrics_match(**overrides):
|
|||
|
||||
class TestLRCLibLyrics(LyricsBackendTest):
|
||||
ITEM_DURATION = 999
|
||||
SYNCED = "[00:00.00] synced"
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def backend_name(self):
|
||||
|
|
@ -471,12 +573,19 @@ class TestLRCLibLyrics(LyricsBackendTest):
|
|||
@pytest.mark.parametrize("response_data", [[lyrics_match()]])
|
||||
@pytest.mark.parametrize(
|
||||
"plugin_config, expected_lyrics",
|
||||
[({"synced": True}, "synced"), ({"synced": False}, "plain")],
|
||||
[
|
||||
pytest.param({"synced": True}, SYNCED, id="pick-synced"),
|
||||
pytest.param({"synced": False}, "plain", id="pick-plain"),
|
||||
],
|
||||
)
|
||||
def test_synced_config_option(self, fetch_lyrics, expected_lyrics):
|
||||
lyrics, _ = fetch_lyrics()
|
||||
def test_synced_config_option(
|
||||
self, backend_name, fetch_lyrics, expected_lyrics
|
||||
):
|
||||
lyrics = fetch_lyrics()
|
||||
|
||||
assert lyrics == expected_lyrics
|
||||
assert lyrics
|
||||
assert lyrics.text == expected_lyrics
|
||||
assert lyrics.backend == backend_name
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_data, expected_lyrics",
|
||||
|
|
@ -484,7 +593,7 @@ class TestLRCLibLyrics(LyricsBackendTest):
|
|||
pytest.param([], None, id="handle non-matching lyrics"),
|
||||
pytest.param(
|
||||
[lyrics_match()],
|
||||
"synced",
|
||||
SYNCED,
|
||||
id="synced when available",
|
||||
),
|
||||
pytest.param(
|
||||
|
|
@ -509,9 +618,9 @@ class TestLRCLibLyrics(LyricsBackendTest):
|
|||
syncedLyrics=None,
|
||||
plainLyrics="plain with closer duration",
|
||||
),
|
||||
lyrics_match(syncedLyrics="synced", plainLyrics="plain 2"),
|
||||
lyrics_match(syncedLyrics=SYNCED, plainLyrics="plain 2"),
|
||||
],
|
||||
"synced",
|
||||
SYNCED,
|
||||
id="prefer synced lyrics even if plain duration is closer",
|
||||
),
|
||||
pytest.param(
|
||||
|
|
@ -529,22 +638,30 @@ class TestLRCLibLyrics(LyricsBackendTest):
|
|||
"valid plain",
|
||||
id="ignore synced with invalid duration",
|
||||
),
|
||||
pytest.param(
|
||||
[
|
||||
lyrics_match(
|
||||
duration=59, syncedLyrics="[01:00.00] invalid synced"
|
||||
)
|
||||
],
|
||||
None,
|
||||
id="ignore synced with a timestamp longer than duration",
|
||||
),
|
||||
pytest.param(
|
||||
[lyrics_match(syncedLyrics=None), lyrics_match()],
|
||||
"synced",
|
||||
SYNCED,
|
||||
id="prefer match with synced lyrics",
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("plugin_config", [{"synced": True}])
|
||||
def test_fetch_lyrics(self, fetch_lyrics, expected_lyrics):
|
||||
lyrics_info = fetch_lyrics()
|
||||
if lyrics_info is None:
|
||||
assert expected_lyrics is None
|
||||
lyrics = fetch_lyrics()
|
||||
if expected_lyrics is None:
|
||||
assert not lyrics
|
||||
else:
|
||||
lyrics, _ = fetch_lyrics()
|
||||
|
||||
assert lyrics == expected_lyrics
|
||||
assert lyrics
|
||||
assert lyrics.text == expected_lyrics
|
||||
|
||||
|
||||
class TestTranslation:
|
||||
|
|
@ -557,6 +674,7 @@ class TestTranslation:
|
|||
" | [Refrain : Doja Cat]"
|
||||
" | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501
|
||||
" | Mon corps ne me laissait pas le cacher (Cachez-le)"
|
||||
" | [Chorus]"
|
||||
" | Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501
|
||||
" | Chevauchant à travers le tonnerre, la foudre"
|
||||
)
|
||||
|
|
@ -590,13 +708,15 @@ class TestTranslation:
|
|||
[Refrain: Doja Cat]
|
||||
Hard for me to let you go (Let you go, let you go)
|
||||
My body wouldn't let me hide it (Hide it)
|
||||
[Chorus]
|
||||
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold)
|
||||
Ridin' through the thunder, lightnin'""",
|
||||
"",
|
||||
Lyrics(""),
|
||||
"""
|
||||
[Refrain: Doja Cat] / [Refrain : Doja Cat]
|
||||
Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)
|
||||
My body wouldn't let me hide it (Hide it) / Mon corps ne me laissait pas le cacher (Cachez-le)
|
||||
[Chorus]
|
||||
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) / Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)
|
||||
Ridin' through the thunder, lightnin' / Chevauchant à travers le tonnerre, la foudre""", # noqa: E501
|
||||
id="plain",
|
||||
|
|
@ -604,28 +724,29 @@ class TestTranslation:
|
|||
pytest.param(
|
||||
"""
|
||||
[00:00.00] Some synced lyrics
|
||||
[00:00:50]
|
||||
[00:00.50]
|
||||
[00:01.00] Some more synced lyrics
|
||||
|
||||
Source: https://lrclib.net/api/123""",
|
||||
"",
|
||||
""",
|
||||
Lyrics(""),
|
||||
"""
|
||||
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
|
||||
[00:00:50]
|
||||
[00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées
|
||||
|
||||
Source: https://lrclib.net/api/123""",
|
||||
[00:00.50]
|
||||
[00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées""", # noqa: E501
|
||||
id="synced",
|
||||
),
|
||||
pytest.param(
|
||||
"Quelques paroles",
|
||||
"",
|
||||
Lyrics(""),
|
||||
"Quelques paroles",
|
||||
id="already in the target language",
|
||||
),
|
||||
pytest.param(
|
||||
"Some lyrics",
|
||||
"Some lyrics / Some translation",
|
||||
Lyrics(
|
||||
"Some lyrics / Some translation",
|
||||
language="EN",
|
||||
translation_language="FR",
|
||||
),
|
||||
"Some lyrics / Some translation",
|
||||
id="already translated",
|
||||
),
|
||||
|
|
@ -636,8 +757,8 @@ class TestTranslation:
|
|||
bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"])
|
||||
|
||||
assert bing.translate(
|
||||
textwrap.dedent(new_lyrics), old_lyrics
|
||||
) == textwrap.dedent(expected)
|
||||
Lyrics(textwrap.dedent(new_lyrics)), old_lyrics
|
||||
).full_text == textwrap.dedent(expected)
|
||||
|
||||
|
||||
class TestRestFiles:
|
||||
|
|
|
|||
37
test/util/test_lyrics.py
Normal file
37
test/util/test_lyrics.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import textwrap
|
||||
|
||||
from beets.util.lyrics import Lyrics
|
||||
|
||||
|
||||
class TestLyrics:
|
||||
def test_instrumental_lyrics(self):
|
||||
lyrics = Lyrics(
|
||||
"[Instrumental]", "lrclib", url="https://lrclib.net/api/1"
|
||||
)
|
||||
|
||||
assert lyrics.full_text == "[Instrumental]"
|
||||
assert lyrics.backend == "lrclib"
|
||||
assert lyrics.url == "https://lrclib.net/api/1"
|
||||
assert lyrics.language is None
|
||||
assert lyrics.translation_language is None
|
||||
|
||||
def test_from_legacy_text(self):
|
||||
text = textwrap.dedent("""
|
||||
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
|
||||
[00:00.50]
|
||||
[00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées
|
||||
|
||||
Source: https://lrclib.net/api/1/""")
|
||||
|
||||
lyrics = Lyrics.from_legacy_text(text)
|
||||
|
||||
assert lyrics.full_text == textwrap.dedent(
|
||||
"""
|
||||
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
|
||||
[00:00.50]
|
||||
[00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées"""
|
||||
)
|
||||
assert lyrics.backend == "lrclib"
|
||||
assert lyrics.url == "https://lrclib.net/api/1/"
|
||||
assert lyrics.language == "EN"
|
||||
assert lyrics.translation_language == "FR"
|
||||
Loading…
Reference in a new issue