mirror of
https://github.com/beetbox/beets.git
synced 2026-01-01 05:23:05 +01:00
Lyrics: Resurrect translations and refactor ReST and command line handling (#5485)
🎵 The Refactoring Blues 🎵 by [Claude](https://claude.ai) Verse 1: Got those lyrics plugin blues Cleaning up some messy code Moving classes, fixing views Making changes down the road Chorus: We're refactoring tonight Making the codebase clean and bright Translation's got a brand new home And ReST files found their own Verse 2: Added Microsoft Translate Keeping tokens safe and sound Config options up-to-date Better structure all around Bridge: Path operations simplified Groups of artists, neat and tied Error handling's looking fine Comments clear along each line Verse 3: RestFiles in their own class now Cleaning imports, showing how Better typing makes it clear What should go and what stays here Final Chorus: We're refactoring tonight Making the codebase clean and bright Translation's got a brand new home And our code can stand alone! — inspired by _the diff_ --- ### Technical Changes - Replaced deprecated and broken Bing translations by Microsoft Translator API - Isolated all functionality in the `Translator` class. - Updated translation settings configuration. - Added support for synced lyrics from LRCLib. - Added support for preserving existing translations to help users to manage their characters quota. - Added error handling and logging - Added tests - Created RestFiles class for ReST document handling - Simplified path operations using pathlib - Added tests - Improved command line options handling #### Caching of translations The plugin will not re-translate lyrics if translations already exist, see ```fish $ beet -v lyrics albumartist::Sel karta -f ... lyrics: LyricsPlugin: Fetching lyrics for Sel - Saulės Miestas lyrics: LRCLib: Fetching JSON from https://lrclib.net/api/get lyrics: LyricsPlugin: 🟢 Found lyrics: 32275 | 1996 / Neįvertinta Karta: Sel - Saulės Miestas lyrics: Translator: Posting data to https://api.cognitive.microsofttranslator.com/translate lyrics: Translator: 🟢 Translated lyrics to EN $ beet -v lyrics albumartist::Sel karta -f ... lyrics: LyricsPlugin: Fetching lyrics for Sel - Saulės Miestas lyrics: LRCLib: Fetching JSON from https://lrclib.net/api/get lyrics: LyricsPlugin: 🟢 Found lyrics: 32275 | 1996 / Neįvertinta Karta: Sel - Saulės Miestas lyrics: Translator: 🔵 Translations already exist ```
This commit is contained in:
commit
fe28957358
6 changed files with 530 additions and 247 deletions
|
|
@ -113,3 +113,23 @@ class GoogleCustomSearchAPI:
|
|||
"""Pagemap data with a single meta tags dict in a list."""
|
||||
|
||||
metatags: list[JSONDict]
|
||||
|
||||
|
||||
class TranslatorAPI:
|
||||
class Language(TypedDict):
|
||||
"""Language data returned by the translator API."""
|
||||
|
||||
language: str
|
||||
score: float
|
||||
|
||||
class Translation(TypedDict):
|
||||
"""Translation data returned by the translator API."""
|
||||
|
||||
text: str
|
||||
to: str
|
||||
|
||||
class Response(TypedDict):
|
||||
"""Response from the translator API."""
|
||||
|
||||
detectedLanguage: TranslatorAPI.Language
|
||||
translations: list[TranslatorAPI.Translation]
|
||||
|
|
|
|||
|
|
@ -17,16 +17,17 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
import errno
|
||||
import itertools
|
||||
import math
|
||||
import os.path
|
||||
import re
|
||||
import textwrap
|
||||
from contextlib import contextmanager, suppress
|
||||
from dataclasses import dataclass
|
||||
from functools import cached_property, partial, total_ordering
|
||||
from html import unescape
|
||||
from http import HTTPStatus
|
||||
from itertools import groupby
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Iterable, Iterator, NamedTuple
|
||||
from urllib.parse import quote, quote_plus, urlencode, urlparse
|
||||
|
||||
|
|
@ -40,49 +41,22 @@ from beets import plugins, ui
|
|||
from beets.autotag.hooks import string_dist
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beets.importer import ImportTask
|
||||
from beets.library import Item
|
||||
from logging import Logger
|
||||
|
||||
from ._typing import GeniusAPI, GoogleCustomSearchAPI, JSONDict, LRCLibAPI
|
||||
from beets.importer import ImportTask
|
||||
from beets.library import Item, Library
|
||||
|
||||
from ._typing import (
|
||||
GeniusAPI,
|
||||
GoogleCustomSearchAPI,
|
||||
JSONDict,
|
||||
LRCLibAPI,
|
||||
TranslatorAPI,
|
||||
)
|
||||
|
||||
USER_AGENT = f"beets/{beets.__version__}"
|
||||
INSTRUMENTAL_LYRICS = "[Instrumental]"
|
||||
|
||||
# The content for the base index.rst generated in ReST mode.
|
||||
REST_INDEX_TEMPLATE = """Lyrics
|
||||
======
|
||||
|
||||
* :ref:`Song index <genindex>`
|
||||
* :ref:`search`
|
||||
|
||||
Artist index:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
artists/*
|
||||
"""
|
||||
|
||||
# The content for the base conf.py generated.
|
||||
REST_CONF_TEMPLATE = """# -*- coding: utf-8 -*-
|
||||
master_doc = 'index'
|
||||
project = 'Lyrics'
|
||||
copyright = 'none'
|
||||
author = 'Various Authors'
|
||||
latex_documents = [
|
||||
(master_doc, 'Lyrics.tex', project,
|
||||
author, 'manual'),
|
||||
]
|
||||
epub_title = project
|
||||
epub_author = author
|
||||
epub_publisher = author
|
||||
epub_copyright = copyright
|
||||
epub_exclude_files = ['search.html']
|
||||
epub_tocdepth = 1
|
||||
epub_tocdup = False
|
||||
"""
|
||||
|
||||
|
||||
class NotFoundError(requests.exceptions.HTTPError):
|
||||
pass
|
||||
|
|
@ -252,6 +226,12 @@ class RequestHandler:
|
|||
self.debug("Fetching JSON from {}", url)
|
||||
return r_session.get(url, **kwargs).json()
|
||||
|
||||
def post_json(self, url: str, params: JSONDict | None = None, **kwargs):
|
||||
"""Send POST request and return JSON response."""
|
||||
url = self.format_url(url, params)
|
||||
self.debug("Posting JSON to {}", url)
|
||||
return r_session.post(url, **kwargs).json()
|
||||
|
||||
@contextmanager
|
||||
def handle_request(self) -> Iterator[None]:
|
||||
try:
|
||||
|
|
@ -760,6 +740,214 @@ class Google(SearchBackend):
|
|||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Translator(RequestHandler):
|
||||
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
|
||||
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
|
||||
SEPARATOR = " | "
|
||||
remove_translations = partial(re.compile(r" / [^\n]+").sub, "")
|
||||
|
||||
_log: Logger
|
||||
api_key: str
|
||||
to_language: str
|
||||
from_languages: list[str]
|
||||
|
||||
@classmethod
|
||||
def from_config(
|
||||
cls,
|
||||
log: Logger,
|
||||
api_key: str,
|
||||
to_language: str,
|
||||
from_languages: list[str] | None = None,
|
||||
) -> Translator:
|
||||
return cls(
|
||||
log,
|
||||
api_key,
|
||||
to_language.upper(),
|
||||
[x.upper() for x in from_languages or []],
|
||||
)
|
||||
|
||||
def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]:
|
||||
"""Return translations for the given texts.
|
||||
|
||||
To reduce the translation 'cost', we translate unique texts, and then
|
||||
map the translations back to the original texts.
|
||||
"""
|
||||
unique_texts = list(dict.fromkeys(texts))
|
||||
text = self.SEPARATOR.join(unique_texts)
|
||||
data: list[TranslatorAPI.Response] = self.post_json(
|
||||
self.TRANSLATE_URL,
|
||||
headers={"Ocp-Apim-Subscription-Key": self.api_key},
|
||||
json=[{"text": text}],
|
||||
params={"api-version": "3.0", "to": self.to_language},
|
||||
)
|
||||
|
||||
translated_text = data[0]["translations"][0]["text"]
|
||||
translations = translated_text.split(self.SEPARATOR)
|
||||
trans_by_text = dict(zip(unique_texts, translations))
|
||||
return list(zip(texts, (trans_by_text.get(t, "") for t in texts)))
|
||||
|
||||
@classmethod
|
||||
def split_line(cls, line: str) -> tuple[str, str]:
|
||||
"""Split line to (timestamp, text)."""
|
||||
if m := cls.LINE_PARTS_RE.match(line):
|
||||
return m[1], m[2]
|
||||
|
||||
return "", ""
|
||||
|
||||
def append_translations(self, lines: Iterable[str]) -> list[str]:
|
||||
"""Append translations to the given lyrics texts.
|
||||
|
||||
Lines may contain timestamps from LRCLib which need to be temporarily
|
||||
removed for the translation. They can take any of these forms:
|
||||
- empty
|
||||
Text - text only
|
||||
[00:00:00] - timestamp only
|
||||
[00:00:00] Text - timestamp with text
|
||||
"""
|
||||
# split into [(timestamp, text), ...]]
|
||||
ts_and_text = list(map(self.split_line, lines))
|
||||
timestamps = [ts for ts, _ in ts_and_text]
|
||||
text_pairs = self.get_translations([ln for _, ln in ts_and_text])
|
||||
|
||||
# only add the separator for non-empty translations
|
||||
texts = [" / ".join(filter(None, p)) for p in text_pairs]
|
||||
# only add the space between non-empty timestamps and texts
|
||||
return [" ".join(filter(None, p)) for p in zip(timestamps, texts)]
|
||||
|
||||
def translate(self, new_lyrics: str, old_lyrics: str) -> str:
|
||||
"""Translate the given lyrics to the target language.
|
||||
|
||||
Check old lyrics for existing translations and return them if their
|
||||
original text matches the new lyrics. This is to avoid translating
|
||||
the same lyrics multiple times.
|
||||
|
||||
If the lyrics are already in the target language or not in any of
|
||||
of the source languages (if configured), they are returned as is.
|
||||
|
||||
The footer with the source URL is preserved, if present.
|
||||
"""
|
||||
if (
|
||||
" / " in old_lyrics
|
||||
and self.remove_translations(old_lyrics) == new_lyrics
|
||||
):
|
||||
self.info("🔵 Translations already exist")
|
||||
return old_lyrics
|
||||
|
||||
lyrics_language = langdetect.detect(new_lyrics).upper()
|
||||
if lyrics_language == self.to_language:
|
||||
self.info(
|
||||
"🔵 Lyrics are already in the target language {}",
|
||||
self.to_language,
|
||||
)
|
||||
return new_lyrics
|
||||
|
||||
if self.from_languages and lyrics_language not in self.from_languages:
|
||||
self.info(
|
||||
"🔵 Configuration {} does not permit translating from {}",
|
||||
self.from_languages,
|
||||
lyrics_language,
|
||||
)
|
||||
return new_lyrics
|
||||
|
||||
lyrics, *url = new_lyrics.split("\n\nSource: ")
|
||||
with self.handle_request():
|
||||
translated_lines = self.append_translations(lyrics.splitlines())
|
||||
self.info("🟢 Translated lyrics to {}", self.to_language)
|
||||
return "\n\nSource: ".join(["\n".join(translated_lines), *url])
|
||||
|
||||
|
||||
@dataclass
|
||||
class RestFiles:
|
||||
# The content for the base index.rst generated in ReST mode.
|
||||
REST_INDEX_TEMPLATE = textwrap.dedent("""
|
||||
Lyrics
|
||||
======
|
||||
|
||||
* :ref:`Song index <genindex>`
|
||||
* :ref:`search`
|
||||
|
||||
Artist index:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
artists/*
|
||||
""").strip()
|
||||
|
||||
# The content for the base conf.py generated.
|
||||
REST_CONF_TEMPLATE = textwrap.dedent("""
|
||||
master_doc = "index"
|
||||
project = "Lyrics"
|
||||
copyright = "none"
|
||||
author = "Various Authors"
|
||||
latex_documents = [
|
||||
(master_doc, "Lyrics.tex", project, author, "manual"),
|
||||
]
|
||||
epub_exclude_files = ["search.html"]
|
||||
epub_tocdepth = 1
|
||||
epub_tocdup = False
|
||||
""").strip()
|
||||
|
||||
directory: Path
|
||||
|
||||
@cached_property
|
||||
def artists_dir(self) -> Path:
|
||||
dir = self.directory / "artists"
|
||||
dir.mkdir(parents=True, exist_ok=True)
|
||||
return dir
|
||||
|
||||
def write_indexes(self) -> None:
|
||||
"""Write conf.py and index.rst files necessary for Sphinx
|
||||
|
||||
We write minimal configurations that are necessary for Sphinx
|
||||
to operate. We do not overwrite existing files so that
|
||||
customizations are respected."""
|
||||
index_file = self.directory / "index.rst"
|
||||
if not index_file.exists():
|
||||
index_file.write_text(self.REST_INDEX_TEMPLATE)
|
||||
conf_file = self.directory / "conf.py"
|
||||
if not conf_file.exists():
|
||||
conf_file.write_text(self.REST_CONF_TEMPLATE)
|
||||
|
||||
def write_artist(self, artist: str, items: Iterable[Item]) -> None:
|
||||
parts = [
|
||||
f'{artist}\n{"=" * len(artist)}',
|
||||
".. contents::\n :local:",
|
||||
]
|
||||
for album, items in groupby(items, key=lambda i: i.album):
|
||||
parts.append(f'{album}\n{"-" * len(album)}')
|
||||
parts.extend(
|
||||
part
|
||||
for i in items
|
||||
if (title := f":index:`{i.title.strip()}`")
|
||||
for part in (
|
||||
f'{title}\n{"~" * len(title)}',
|
||||
textwrap.indent(i.lyrics, "| "),
|
||||
)
|
||||
)
|
||||
file = self.artists_dir / f"{slug(artist)}.rst"
|
||||
file.write_text("\n\n".join(parts).strip())
|
||||
|
||||
def write(self, items: list[Item]) -> None:
|
||||
self.directory.mkdir(exist_ok=True, parents=True)
|
||||
self.write_indexes()
|
||||
|
||||
items.sort(key=lambda i: i.albumartist)
|
||||
for artist, artist_items in groupby(items, key=lambda i: i.albumartist):
|
||||
self.write_artist(artist.strip(), artist_items)
|
||||
|
||||
d = self.directory
|
||||
text = f"""
|
||||
ReST files generated. to build, use one of:
|
||||
sphinx-build -b html {d} {d/"html"}
|
||||
sphinx-build -b epub {d} {d/"epub"}
|
||||
sphinx-build -b latex {d} {d/"latex"} && make -C {d/"latex"} all-pdf
|
||||
"""
|
||||
ui.print_(textwrap.dedent(text))
|
||||
|
||||
|
||||
class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
|
||||
BACKEND_BY_NAME = {
|
||||
b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch]
|
||||
|
|
@ -776,15 +964,23 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
|
|||
|
||||
return [self.BACKEND_BY_NAME[c](self.config, self._log) for c in chosen]
|
||||
|
||||
@cached_property
|
||||
def translator(self) -> Translator | None:
|
||||
config = self.config["translate"]
|
||||
if config["api_key"].get() and config["to_language"].get():
|
||||
return Translator.from_config(self._log, **config.flatten())
|
||||
return None
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.import_stages = [self.imported]
|
||||
self.config.add(
|
||||
{
|
||||
"auto": True,
|
||||
"bing_client_secret": None,
|
||||
"bing_lang_from": [],
|
||||
"bing_lang_to": None,
|
||||
"translate": {
|
||||
"api_key": None,
|
||||
"from_languages": [],
|
||||
"to_language": None,
|
||||
},
|
||||
"dist_thresh": 0.11,
|
||||
"google_API_key": None,
|
||||
"google_engine_ID": "009217259823014548361:lndtuqkycfu",
|
||||
|
|
@ -795,6 +991,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
|
|||
"fallback": None,
|
||||
"force": False,
|
||||
"local": False,
|
||||
"print": False,
|
||||
"synced": False,
|
||||
# Musixmatch is disabled by default as they are currently blocking
|
||||
# requests with the beets user agent.
|
||||
|
|
@ -803,52 +1000,27 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
|
|||
],
|
||||
}
|
||||
)
|
||||
self.config["bing_client_secret"].redact = True
|
||||
self.config["translate"]["api_key"].redact = True
|
||||
self.config["google_API_key"].redact = True
|
||||
self.config["google_engine_ID"].redact = True
|
||||
self.config["genius_api_key"].redact = True
|
||||
|
||||
# State information for the ReST writer.
|
||||
# First, the current artist we're writing.
|
||||
self.artist = "Unknown artist"
|
||||
# The current album: False means no album yet.
|
||||
self.album = False
|
||||
# The current rest file content. None means the file is not
|
||||
# open yet.
|
||||
self.rest = None
|
||||
|
||||
self.config["bing_lang_from"] = [
|
||||
x.lower() for x in self.config["bing_lang_from"].as_str_seq()
|
||||
]
|
||||
|
||||
@cached_property
|
||||
def bing_access_token(self) -> str | None:
|
||||
params = {
|
||||
"client_id": "beets",
|
||||
"client_secret": self.config["bing_client_secret"],
|
||||
"scope": "https://api.microsofttranslator.com",
|
||||
"grant_type": "client_credentials",
|
||||
}
|
||||
|
||||
oauth_url = "https://datamarket.accesscontrol.windows.net/v2/OAuth2-13"
|
||||
with self.handle_request():
|
||||
r = r_session.post(oauth_url, params=params)
|
||||
return r.json()["access_token"]
|
||||
if self.config["auto"]:
|
||||
self.import_stages = [self.imported]
|
||||
|
||||
def commands(self):
|
||||
cmd = ui.Subcommand("lyrics", help="fetch song lyrics")
|
||||
cmd.parser.add_option(
|
||||
"-p",
|
||||
"--print",
|
||||
dest="printlyr",
|
||||
action="store_true",
|
||||
default=False,
|
||||
default=self.config["print"].get(),
|
||||
help="print lyrics to console",
|
||||
)
|
||||
cmd.parser.add_option(
|
||||
"-r",
|
||||
"--write-rest",
|
||||
dest="writerest",
|
||||
dest="rest_directory",
|
||||
action="store",
|
||||
default=None,
|
||||
metavar="dir",
|
||||
|
|
@ -857,154 +1029,69 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
|
|||
cmd.parser.add_option(
|
||||
"-f",
|
||||
"--force",
|
||||
dest="force_refetch",
|
||||
action="store_true",
|
||||
default=False,
|
||||
default=self.config["force"].get(),
|
||||
help="always re-download lyrics",
|
||||
)
|
||||
cmd.parser.add_option(
|
||||
"-l",
|
||||
"--local",
|
||||
dest="local_only",
|
||||
action="store_true",
|
||||
default=False,
|
||||
default=self.config["local"].get(),
|
||||
help="do not fetch missing lyrics",
|
||||
)
|
||||
|
||||
def func(lib, opts, args):
|
||||
def func(lib: Library, opts, args) -> None:
|
||||
# The "write to files" option corresponds to the
|
||||
# import_write config value.
|
||||
write = ui.should_write()
|
||||
if opts.writerest:
|
||||
self.writerest_indexes(opts.writerest)
|
||||
items = lib.items(ui.decargs(args))
|
||||
self.config.set(vars(opts))
|
||||
items = list(lib.items(args))
|
||||
for item in items:
|
||||
if not opts.local_only and not self.config["local"]:
|
||||
self.fetch_item_lyrics(
|
||||
item, write, opts.force_refetch or self.config["force"]
|
||||
)
|
||||
if item.lyrics:
|
||||
if opts.printlyr:
|
||||
ui.print_(item.lyrics)
|
||||
if opts.writerest:
|
||||
self.appendrest(opts.writerest, item)
|
||||
if opts.writerest and items:
|
||||
# flush last artist & write to ReST
|
||||
self.writerest(opts.writerest)
|
||||
ui.print_("ReST files generated. to build, use one of:")
|
||||
ui.print_(
|
||||
" sphinx-build -b html %s _build/html" % opts.writerest
|
||||
)
|
||||
ui.print_(
|
||||
" sphinx-build -b epub %s _build/epub" % opts.writerest
|
||||
)
|
||||
ui.print_(
|
||||
(
|
||||
" sphinx-build -b latex %s _build/latex "
|
||||
"&& make -C _build/latex all-pdf"
|
||||
)
|
||||
% opts.writerest
|
||||
)
|
||||
self.add_item_lyrics(item, ui.should_write())
|
||||
if item.lyrics and opts.print:
|
||||
ui.print_(item.lyrics)
|
||||
|
||||
if opts.rest_directory and (
|
||||
items := [i for i in items if i.lyrics]
|
||||
):
|
||||
RestFiles(Path(opts.rest_directory)).write(items)
|
||||
|
||||
cmd.func = func
|
||||
return [cmd]
|
||||
|
||||
def appendrest(self, directory, item):
|
||||
"""Append the item to an ReST file
|
||||
|
||||
This will keep state (in the `rest` variable) in order to avoid
|
||||
writing continuously to the same files.
|
||||
"""
|
||||
|
||||
if slug(self.artist) != slug(item.albumartist):
|
||||
# Write current file and start a new one ~ item.albumartist
|
||||
self.writerest(directory)
|
||||
self.artist = item.albumartist.strip()
|
||||
self.rest = "%s\n%s\n\n.. contents::\n :local:\n\n" % (
|
||||
self.artist,
|
||||
"=" * len(self.artist),
|
||||
)
|
||||
|
||||
if self.album != item.album:
|
||||
tmpalbum = self.album = item.album.strip()
|
||||
if self.album == "":
|
||||
tmpalbum = "Unknown album"
|
||||
self.rest += "{}\n{}\n\n".format(tmpalbum, "-" * len(tmpalbum))
|
||||
title_str = ":index:`%s`" % item.title.strip()
|
||||
block = "| " + item.lyrics.replace("\n", "\n| ")
|
||||
self.rest += "{}\n{}\n\n{}\n\n".format(
|
||||
title_str, "~" * len(title_str), block
|
||||
)
|
||||
|
||||
def writerest(self, directory):
|
||||
"""Write self.rest to a ReST file"""
|
||||
if self.rest is not None and self.artist is not None:
|
||||
path = os.path.join(
|
||||
directory, "artists", slug(self.artist) + ".rst"
|
||||
)
|
||||
with open(path, "wb") as output:
|
||||
output.write(self.rest.encode("utf-8"))
|
||||
|
||||
def writerest_indexes(self, directory):
|
||||
"""Write conf.py and index.rst files necessary for Sphinx
|
||||
|
||||
We write minimal configurations that are necessary for Sphinx
|
||||
to operate. We do not overwrite existing files so that
|
||||
customizations are respected."""
|
||||
try:
|
||||
os.makedirs(os.path.join(directory, "artists"))
|
||||
except OSError as e:
|
||||
if e.errno == errno.EEXIST:
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
indexfile = os.path.join(directory, "index.rst")
|
||||
if not os.path.exists(indexfile):
|
||||
with open(indexfile, "w") as output:
|
||||
output.write(REST_INDEX_TEMPLATE)
|
||||
conffile = os.path.join(directory, "conf.py")
|
||||
if not os.path.exists(conffile):
|
||||
with open(conffile, "w") as output:
|
||||
output.write(REST_CONF_TEMPLATE)
|
||||
|
||||
def imported(self, _, task: ImportTask) -> None:
|
||||
"""Import hook for fetching lyrics automatically."""
|
||||
if self.config["auto"]:
|
||||
for item in task.imported_items():
|
||||
self.fetch_item_lyrics(item, False, self.config["force"])
|
||||
for item in task.imported_items():
|
||||
self.add_item_lyrics(item, False)
|
||||
|
||||
def fetch_item_lyrics(self, item: Item, write: bool, force: bool) -> None:
|
||||
def find_lyrics(self, item: Item) -> str:
|
||||
album, length = item.album, round(item.length)
|
||||
matches = (
|
||||
[
|
||||
lyrics
|
||||
for t in titles
|
||||
if (lyrics := self.get_lyrics(a, t, album, length))
|
||||
]
|
||||
for a, titles in search_pairs(item)
|
||||
)
|
||||
|
||||
return "\n\n---\n\n".join(next(filter(None, matches), []))
|
||||
|
||||
def add_item_lyrics(self, item: Item, write: bool) -> None:
|
||||
"""Fetch and store lyrics for a single item. If ``write``, then the
|
||||
lyrics will also be written to the file itself.
|
||||
"""
|
||||
# Skip if the item already has lyrics.
|
||||
if not force and item.lyrics:
|
||||
if self.config["local"]:
|
||||
return
|
||||
|
||||
if not self.config["force"] and item.lyrics:
|
||||
self.info("🔵 Lyrics already present: {}", item)
|
||||
return
|
||||
|
||||
lyrics_matches = []
|
||||
album, length = item.album, round(item.length)
|
||||
for artist, titles in search_pairs(item):
|
||||
lyrics_matches = [
|
||||
self.get_lyrics(artist, title, album, length)
|
||||
for title in titles
|
||||
]
|
||||
if any(lyrics_matches):
|
||||
break
|
||||
|
||||
lyrics = "\n\n---\n\n".join(filter(None, lyrics_matches))
|
||||
|
||||
if lyrics:
|
||||
if lyrics := self.find_lyrics(item):
|
||||
self.info("🟢 Found lyrics: {0}", item)
|
||||
if self.config["bing_client_secret"].get():
|
||||
lang_from = langdetect.detect(lyrics)
|
||||
if self.config["bing_lang_to"].get() != lang_from and (
|
||||
not self.config["bing_lang_from"]
|
||||
or (lang_from in self.config["bing_lang_from"].as_str_seq())
|
||||
):
|
||||
lyrics = self.append_translation(
|
||||
lyrics, self.config["bing_lang_to"]
|
||||
)
|
||||
if translator := self.translator:
|
||||
lyrics = translator.translate(lyrics, item.lyrics)
|
||||
else:
|
||||
self.info("🔴 Lyrics not found: {}", item)
|
||||
lyrics = self.config["fallback"].get()
|
||||
|
|
@ -1027,30 +1114,3 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
|
|||
return f"{lyrics}\n\nSource: {url}"
|
||||
|
||||
return None
|
||||
|
||||
def append_translation(self, text, to_lang):
|
||||
from xml.etree import ElementTree
|
||||
|
||||
if not (token := self.bing_access_token):
|
||||
self.warn(
|
||||
"Could not get Bing Translate API access token. "
|
||||
"Check your 'bing_client_secret' password."
|
||||
)
|
||||
return text
|
||||
|
||||
# Extract unique lines to limit API request size per song
|
||||
lines = text.split("\n")
|
||||
unique_lines = set(lines)
|
||||
url = "https://api.microsofttranslator.com/v2/Http.svc/Translate"
|
||||
with self.handle_request():
|
||||
text = self.fetch_text(
|
||||
url,
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
params={"text": "|".join(unique_lines), "to": to_lang},
|
||||
)
|
||||
if translated := ElementTree.fromstring(text.encode("utf-8")).text:
|
||||
# Use a translation mapping dict to build resulting lyrics
|
||||
translations = dict(zip(unique_lines, translated.split("|")))
|
||||
return "".join(f"{ln} / {translations[ln]}\n" for ln in lines)
|
||||
|
||||
return text
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ New features:
|
|||
control the maximum allowed distance between the lyrics search result and the
|
||||
tagged item's artist and title. This is useful for preventing false positives
|
||||
when fetching lyrics.
|
||||
* :doc:`plugins/lyrics`: Rewrite lyrics translation functionality to use Azure
|
||||
AI Translator API and add relevant instructions to the documentation.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
|
|
|
|||
|
|
@ -38,26 +38,30 @@ Default configuration:
|
|||
|
||||
lyrics:
|
||||
auto: yes
|
||||
bing_client_secret: null
|
||||
bing_lang_from: []
|
||||
bing_lang_to: null
|
||||
translate:
|
||||
api_key:
|
||||
from_languages: []
|
||||
to_language:
|
||||
dist_thresh: 0.11
|
||||
fallback: null
|
||||
force: no
|
||||
google_API_key: null
|
||||
google_engine_ID: 009217259823014548361:lndtuqkycfu
|
||||
print: no
|
||||
sources: [lrclib, google, genius, tekstowo]
|
||||
synced: no
|
||||
|
||||
The available options are:
|
||||
|
||||
- **auto**: Fetch lyrics automatically during import.
|
||||
- **bing_client_secret**: Your Bing Translation application password
|
||||
(see :ref:`lyrics-translation`)
|
||||
- **bing_lang_from**: By default all lyrics with a language other than
|
||||
``bing_lang_to`` are translated. Use a list of lang codes to restrict the set
|
||||
of source languages to translate.
|
||||
- **bing_lang_to**: Language to translate lyrics into.
|
||||
- **translate**:
|
||||
|
||||
- **api_key**: Api key to access your Azure Translator resource. (see
|
||||
:ref:`lyrics-translation`)
|
||||
- **from_languages**: By default all lyrics with a language other than
|
||||
``translate_to`` are translated. Use a list of language codes to restrict
|
||||
them.
|
||||
- **to_language**: Language code to translate lyrics to.
|
||||
- **dist_thresh**: The maximum distance between the artist and title
|
||||
combination of the music file and lyrics candidate to consider them a match.
|
||||
Lower values will make the plugin more strict, higher values will make it
|
||||
|
|
@ -72,6 +76,7 @@ The available options are:
|
|||
- **google_engine_ID**: The custom search engine to use.
|
||||
Default: The `beets custom search engine`_, which gathers an updated list of
|
||||
sources known to be scrapeable.
|
||||
- **print**: Print lyrics to the console.
|
||||
- **sources**: List of sources to search for lyrics. An asterisk ``*`` expands
|
||||
to all available sources. The ``google`` source will be automatically
|
||||
deactivated if no ``google_API_key`` is setup.
|
||||
|
|
@ -104,9 +109,8 @@ Rendering Lyrics into Other Formats
|
|||
-----------------------------------
|
||||
|
||||
The ``-r directory, --write-rest directory`` option renders all lyrics as
|
||||
`reStructuredText`_ (ReST) documents in ``directory`` (by default, the current
|
||||
directory). That directory, in turn, can be parsed by tools like `Sphinx`_ to
|
||||
generate HTML, ePUB, or PDF documents.
|
||||
`reStructuredText`_ (ReST) documents in ``directory``. That directory, in turn,
|
||||
can be parsed by tools like `Sphinx`_ to generate HTML, ePUB, or PDF documents.
|
||||
|
||||
Minimal ``conf.py`` and ``index.rst`` files are created the first time the
|
||||
command is run. They are not overwritten on subsequent runs, so you can safely
|
||||
|
|
@ -119,19 +123,19 @@ Sphinx supports various `builders`_, see a few suggestions:
|
|||
|
||||
::
|
||||
|
||||
sphinx-build -b html . _build/html
|
||||
sphinx-build -b html <dir> <dir>/html
|
||||
|
||||
.. admonition:: Build an ePUB3 formatted file, usable on ebook readers
|
||||
|
||||
::
|
||||
|
||||
sphinx-build -b epub3 . _build/epub
|
||||
sphinx-build -b epub3 <dir> <dir>/epub
|
||||
|
||||
.. admonition:: Build a PDF file, which incidentally also builds a LaTeX file
|
||||
|
||||
::
|
||||
|
||||
sphinx-build -b latex %s _build/latex && make -C _build/latex all-pdf
|
||||
sphinx-build -b latex <dir> <dir>/latex && make -C <dir>/latex all-pdf
|
||||
|
||||
|
||||
.. _Sphinx: https://www.sphinx-doc.org/
|
||||
|
|
@ -165,10 +169,28 @@ After that, the lyrics plugin will fall back on other declared data sources.
|
|||
Activate On-the-Fly Translation
|
||||
-------------------------------
|
||||
|
||||
You need to register for a Microsoft Azure Marketplace free account and
|
||||
to the `Microsoft Translator API`_. Follow the four steps process, specifically
|
||||
at step 3 enter ``beets`` as *Client ID* and copy/paste the generated
|
||||
*Client secret* into your ``bing_client_secret`` configuration, alongside
|
||||
``bing_lang_to`` target ``language code``.
|
||||
We use Azure to optionally translate your lyrics. To set up the integration,
|
||||
follow these steps:
|
||||
|
||||
.. _Microsoft Translator API: https://docs.microsoft.com/en-us/azure/cognitive-services/translator/translator-how-to-signup
|
||||
1. `Create a Translator resource`_ on Azure.
|
||||
2. `Obtain its API key`_.
|
||||
3. Add the API key to your configuration as ``translate.api_key``.
|
||||
4. Configure your target language using the ``translate.to_language`` option.
|
||||
|
||||
|
||||
For example, with the following configuration
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
lyrics:
|
||||
translate:
|
||||
api_key: YOUR_TRANSLATOR_API_KEY
|
||||
to_language: de
|
||||
|
||||
You should expect lyrics like this::
|
||||
|
||||
Original verse / Ursprünglicher Vers
|
||||
Some other verse / Ein anderer Vers
|
||||
|
||||
.. _create a Translator resource: https://learn.microsoft.com/en-us/azure/ai-services/translator/create-translator-resource
|
||||
.. _obtain its API key: https://learn.microsoft.com/en-us/python/api/overview/azure/ai-translation-text-readme?view=azure-python&preserve-view=true#get-an-api-key
|
||||
|
|
|
|||
|
|
@ -15,7 +15,9 @@ markers =
|
|||
data_file = .reports/coverage/data
|
||||
branch = true
|
||||
relative_files = true
|
||||
omit = beets/test/*
|
||||
omit =
|
||||
beets/test/*
|
||||
beetsplug/_typing.py
|
||||
|
||||
[coverage:report]
|
||||
precision = 2
|
||||
|
|
|
|||
|
|
@ -17,13 +17,15 @@
|
|||
import importlib.util
|
||||
import os
|
||||
import re
|
||||
import textwrap
|
||||
from functools import partial
|
||||
from http import HTTPStatus
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from beets.library import Item
|
||||
from beets.test.helper import PluginMixin
|
||||
from beets.test.helper import PluginMixin, TestHelper
|
||||
from beetsplug import lyrics
|
||||
|
||||
from .lyrics_pages import LyricsPage, lyrics_pages
|
||||
|
|
@ -40,6 +42,14 @@ PHRASE_BY_TITLE = {
|
|||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def helper():
|
||||
helper = TestHelper()
|
||||
helper.setup_beets()
|
||||
yield helper
|
||||
helper.teardown_beets()
|
||||
|
||||
|
||||
class TestLyricsUtils:
|
||||
@pytest.mark.parametrize(
|
||||
"artist, title",
|
||||
|
|
@ -238,6 +248,27 @@ class TestLyricsPlugin(LyricsPluginMixin):
|
|||
assert last_log
|
||||
assert re.search(expected_log_match, last_log, re.I)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"plugin_config, found, expected",
|
||||
[
|
||||
({}, "new", "old"),
|
||||
({"force": True}, "new", "new"),
|
||||
({"force": True, "local": True}, "new", "old"),
|
||||
({"force": True, "fallback": None}, "", "old"),
|
||||
({"force": True, "fallback": ""}, "", ""),
|
||||
({"force": True, "fallback": "default"}, "", "default"),
|
||||
],
|
||||
)
|
||||
def test_overwrite_config(
|
||||
self, monkeypatch, helper, lyrics_plugin, found, expected
|
||||
):
|
||||
monkeypatch.setattr(lyrics_plugin, "find_lyrics", lambda _: found)
|
||||
item = helper.create_item(id=1, lyrics="old")
|
||||
|
||||
lyrics_plugin.add_item_lyrics(item, False)
|
||||
|
||||
assert item.lyrics == expected
|
||||
|
||||
|
||||
class LyricsBackendTest(LyricsPluginMixin):
|
||||
@pytest.fixture
|
||||
|
|
@ -287,8 +318,13 @@ class TestLyricsSources(LyricsBackendTest):
|
|||
|
||||
def test_backend_source(self, lyrics_plugin, lyrics_page: LyricsPage):
|
||||
"""Test parsed lyrics from each of the configured lyrics pages."""
|
||||
lyrics_info = lyrics_plugin.get_lyrics(
|
||||
lyrics_page.artist, lyrics_page.track_title, "", 186
|
||||
lyrics_info = lyrics_plugin.find_lyrics(
|
||||
Item(
|
||||
artist=lyrics_page.artist,
|
||||
title=lyrics_page.track_title,
|
||||
album="",
|
||||
length=186.0,
|
||||
)
|
||||
)
|
||||
|
||||
assert lyrics_info
|
||||
|
|
@ -509,3 +545,144 @@ class TestLRCLibLyrics(LyricsBackendTest):
|
|||
lyrics, _ = fetch_lyrics()
|
||||
|
||||
assert lyrics == expected_lyrics
|
||||
|
||||
|
||||
class TestTranslation:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _patch_bing(self, requests_mock):
|
||||
def callback(request, _):
|
||||
if b"Refrain" in request.body:
|
||||
translations = (
|
||||
""
|
||||
" | [Refrain : Doja Cat]"
|
||||
" | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501
|
||||
" | Mon corps ne me laissait pas le cacher (Cachez-le)"
|
||||
" | Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501
|
||||
" | Chevauchant à travers le tonnerre, la foudre"
|
||||
)
|
||||
elif b"00:00.00" in request.body:
|
||||
translations = (
|
||||
""
|
||||
" | [00:00.00] Quelques paroles synchronisées"
|
||||
" | [00:01.00] Quelques paroles plus synchronisées"
|
||||
)
|
||||
else:
|
||||
translations = (
|
||||
""
|
||||
" | Quelques paroles synchronisées"
|
||||
" | Quelques paroles plus synchronisées"
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"detectedLanguage": {"language": "en", "score": 1.0},
|
||||
"translations": [{"text": translations, "to": "fr"}],
|
||||
}
|
||||
]
|
||||
|
||||
requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"new_lyrics, old_lyrics, expected",
|
||||
[
|
||||
pytest.param(
|
||||
"""
|
||||
[Refrain: Doja Cat]
|
||||
Hard for me to let you go (Let you go, let you go)
|
||||
My body wouldn't let me hide it (Hide it)
|
||||
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold)
|
||||
Ridin' through the thunder, lightnin'""",
|
||||
"",
|
||||
"""
|
||||
[Refrain: Doja Cat] / [Refrain : Doja Cat]
|
||||
Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)
|
||||
My body wouldn't let me hide it (Hide it) / Mon corps ne me laissait pas le cacher (Cachez-le)
|
||||
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) / Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)
|
||||
Ridin' through the thunder, lightnin' / Chevauchant à travers le tonnerre, la foudre""", # noqa: E501
|
||||
id="plain",
|
||||
),
|
||||
pytest.param(
|
||||
"""
|
||||
[00:00.00] Some synced lyrics
|
||||
[00:00:50]
|
||||
[00:01.00] Some more synced lyrics
|
||||
|
||||
Source: https://lrclib.net/api/123""",
|
||||
"",
|
||||
"""
|
||||
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
|
||||
[00:00:50]
|
||||
[00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées
|
||||
|
||||
Source: https://lrclib.net/api/123""", # noqa: E501
|
||||
id="synced",
|
||||
),
|
||||
pytest.param(
|
||||
"Quelques paroles",
|
||||
"",
|
||||
"Quelques paroles",
|
||||
id="already in the target language",
|
||||
),
|
||||
pytest.param(
|
||||
"Some lyrics",
|
||||
"Some lyrics / Some translation",
|
||||
"Some lyrics / Some translation",
|
||||
id="already translated",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_translate(self, new_lyrics, old_lyrics, expected):
|
||||
plugin = lyrics.LyricsPlugin()
|
||||
bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"])
|
||||
|
||||
assert bing.translate(
|
||||
textwrap.dedent(new_lyrics), old_lyrics
|
||||
) == textwrap.dedent(expected)
|
||||
|
||||
|
||||
class TestRestFiles:
|
||||
@pytest.fixture
|
||||
def rest_dir(self, tmp_path):
|
||||
return tmp_path
|
||||
|
||||
@pytest.fixture
|
||||
def rest_files(self, rest_dir):
|
||||
return lyrics.RestFiles(rest_dir)
|
||||
|
||||
def test_write(self, rest_dir: Path, rest_files):
|
||||
items = [
|
||||
Item(albumartist=aa, album=a, title=t, lyrics=lyr)
|
||||
for aa, a, t, lyr in [
|
||||
("Artist One", "Album One", "Song One", "Lyrics One"),
|
||||
("Artist One", "Album One", "Song Two", "Lyrics Two"),
|
||||
("Artist Two", "Album Two", "Song Three", "Lyrics Three"),
|
||||
]
|
||||
]
|
||||
|
||||
rest_files.write(items)
|
||||
|
||||
assert (rest_dir / "index.rst").exists()
|
||||
assert (rest_dir / "conf.py").exists()
|
||||
|
||||
artist_one_file = rest_dir / "artists" / "artist-one.rst"
|
||||
artist_two_file = rest_dir / "artists" / "artist-two.rst"
|
||||
assert artist_one_file.exists()
|
||||
assert artist_two_file.exists()
|
||||
|
||||
c = artist_one_file.read_text()
|
||||
assert (
|
||||
c.index("Artist One")
|
||||
< c.index("Album One")
|
||||
< c.index("Song One")
|
||||
< c.index("Lyrics One")
|
||||
< c.index("Song Two")
|
||||
< c.index("Lyrics Two")
|
||||
)
|
||||
|
||||
c = artist_two_file.read_text()
|
||||
assert (
|
||||
c.index("Artist Two")
|
||||
< c.index("Album Two")
|
||||
< c.index("Song Three")
|
||||
< c.index("Lyrics Three")
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in a new issue