Lyrics: Resurrect translations and refactor ReST and command line handling (#5485)

🎵 The Refactoring Blues 🎵 by [Claude](https://claude.ai)

Verse 1:
Got those lyrics plugin blues
Cleaning up some messy code
Moving classes, fixing views
Making changes down the road

Chorus:
We're refactoring tonight
Making the codebase clean and bright
Translation's got a brand new home
And ReST files found their own

Verse 2:
Added Microsoft Translate
Keeping tokens safe and sound
Config options up-to-date
Better structure all around

Bridge:
Path operations simplified
Groups of artists, neat and tied
Error handling's looking fine
Comments clear along each line

Verse 3:
RestFiles in their own class now
Cleaning imports, showing how
Better typing makes it clear
What should go and what stays here

Final Chorus:
We're refactoring tonight
Making the codebase clean and bright
Translation's got a brand new home
And our code can stand alone!

—  inspired by _the diff_

---

### Technical Changes

- Replaced deprecated and broken Bing translations by Microsoft
Translator API
  - Isolated all functionality in the `Translator` class.
  - Updated translation settings configuration.
  - Added support for synced lyrics from LRCLib.
- Added support for preserving existing translations to help users to
manage their characters quota.
  - Added error handling and logging
  - Added tests

- Created RestFiles class for ReST document handling
  - Simplified path operations using pathlib
  - Added tests

- Improved command line options handling

#### Caching of translations
The plugin will not re-translate lyrics if translations already exist,
see

```fish
$ beet -v lyrics albumartist::Sel karta -f
...
lyrics: LyricsPlugin: Fetching lyrics for Sel - Saulės Miestas
lyrics: LRCLib: Fetching JSON from https://lrclib.net/api/get
lyrics: LyricsPlugin: 🟢 Found lyrics: 32275 | 1996 / Neįvertinta Karta: Sel - Saulės Miestas
lyrics: Translator: Posting data to https://api.cognitive.microsofttranslator.com/translate
lyrics: Translator: 🟢 Translated lyrics to EN

$ beet -v lyrics albumartist::Sel karta -f
...
lyrics: LyricsPlugin: Fetching lyrics for Sel - Saulės Miestas
lyrics: LRCLib: Fetching JSON from https://lrclib.net/api/get
lyrics: LyricsPlugin: 🟢 Found lyrics: 32275 | 1996 / Neįvertinta Karta: Sel - Saulės Miestas
lyrics: Translator: 🔵 Translations already exist
```
This commit is contained in:
Šarūnas Nejus 2025-02-20 04:19:08 +00:00 committed by GitHub
commit fe28957358
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 530 additions and 247 deletions

View file

@ -113,3 +113,23 @@ class GoogleCustomSearchAPI:
"""Pagemap data with a single meta tags dict in a list."""
metatags: list[JSONDict]
class TranslatorAPI:
class Language(TypedDict):
"""Language data returned by the translator API."""
language: str
score: float
class Translation(TypedDict):
"""Translation data returned by the translator API."""
text: str
to: str
class Response(TypedDict):
"""Response from the translator API."""
detectedLanguage: TranslatorAPI.Language
translations: list[TranslatorAPI.Translation]

View file

@ -17,16 +17,17 @@
from __future__ import annotations
import atexit
import errno
import itertools
import math
import os.path
import re
import textwrap
from contextlib import contextmanager, suppress
from dataclasses import dataclass
from functools import cached_property, partial, total_ordering
from html import unescape
from http import HTTPStatus
from itertools import groupby
from pathlib import Path
from typing import TYPE_CHECKING, Iterable, Iterator, NamedTuple
from urllib.parse import quote, quote_plus, urlencode, urlparse
@ -40,49 +41,22 @@ from beets import plugins, ui
from beets.autotag.hooks import string_dist
if TYPE_CHECKING:
from beets.importer import ImportTask
from beets.library import Item
from logging import Logger
from ._typing import GeniusAPI, GoogleCustomSearchAPI, JSONDict, LRCLibAPI
from beets.importer import ImportTask
from beets.library import Item, Library
from ._typing import (
GeniusAPI,
GoogleCustomSearchAPI,
JSONDict,
LRCLibAPI,
TranslatorAPI,
)
USER_AGENT = f"beets/{beets.__version__}"
INSTRUMENTAL_LYRICS = "[Instrumental]"
# The content for the base index.rst generated in ReST mode.
REST_INDEX_TEMPLATE = """Lyrics
======
* :ref:`Song index <genindex>`
* :ref:`search`
Artist index:
.. toctree::
:maxdepth: 1
:glob:
artists/*
"""
# The content for the base conf.py generated.
REST_CONF_TEMPLATE = """# -*- coding: utf-8 -*-
master_doc = 'index'
project = 'Lyrics'
copyright = 'none'
author = 'Various Authors'
latex_documents = [
(master_doc, 'Lyrics.tex', project,
author, 'manual'),
]
epub_title = project
epub_author = author
epub_publisher = author
epub_copyright = copyright
epub_exclude_files = ['search.html']
epub_tocdepth = 1
epub_tocdup = False
"""
class NotFoundError(requests.exceptions.HTTPError):
pass
@ -252,6 +226,12 @@ class RequestHandler:
self.debug("Fetching JSON from {}", url)
return r_session.get(url, **kwargs).json()
def post_json(self, url: str, params: JSONDict | None = None, **kwargs):
"""Send POST request and return JSON response."""
url = self.format_url(url, params)
self.debug("Posting JSON to {}", url)
return r_session.post(url, **kwargs).json()
@contextmanager
def handle_request(self) -> Iterator[None]:
try:
@ -760,6 +740,214 @@ class Google(SearchBackend):
return None
@dataclass
class Translator(RequestHandler):
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
SEPARATOR = " | "
remove_translations = partial(re.compile(r" / [^\n]+").sub, "")
_log: Logger
api_key: str
to_language: str
from_languages: list[str]
@classmethod
def from_config(
cls,
log: Logger,
api_key: str,
to_language: str,
from_languages: list[str] | None = None,
) -> Translator:
return cls(
log,
api_key,
to_language.upper(),
[x.upper() for x in from_languages or []],
)
def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]:
"""Return translations for the given texts.
To reduce the translation 'cost', we translate unique texts, and then
map the translations back to the original texts.
"""
unique_texts = list(dict.fromkeys(texts))
text = self.SEPARATOR.join(unique_texts)
data: list[TranslatorAPI.Response] = self.post_json(
self.TRANSLATE_URL,
headers={"Ocp-Apim-Subscription-Key": self.api_key},
json=[{"text": text}],
params={"api-version": "3.0", "to": self.to_language},
)
translated_text = data[0]["translations"][0]["text"]
translations = translated_text.split(self.SEPARATOR)
trans_by_text = dict(zip(unique_texts, translations))
return list(zip(texts, (trans_by_text.get(t, "") for t in texts)))
@classmethod
def split_line(cls, line: str) -> tuple[str, str]:
"""Split line to (timestamp, text)."""
if m := cls.LINE_PARTS_RE.match(line):
return m[1], m[2]
return "", ""
def append_translations(self, lines: Iterable[str]) -> list[str]:
"""Append translations to the given lyrics texts.
Lines may contain timestamps from LRCLib which need to be temporarily
removed for the translation. They can take any of these forms:
- empty
Text - text only
[00:00:00] - timestamp only
[00:00:00] Text - timestamp with text
"""
# split into [(timestamp, text), ...]]
ts_and_text = list(map(self.split_line, lines))
timestamps = [ts for ts, _ in ts_and_text]
text_pairs = self.get_translations([ln for _, ln in ts_and_text])
# only add the separator for non-empty translations
texts = [" / ".join(filter(None, p)) for p in text_pairs]
# only add the space between non-empty timestamps and texts
return [" ".join(filter(None, p)) for p in zip(timestamps, texts)]
def translate(self, new_lyrics: str, old_lyrics: str) -> str:
"""Translate the given lyrics to the target language.
Check old lyrics for existing translations and return them if their
original text matches the new lyrics. This is to avoid translating
the same lyrics multiple times.
If the lyrics are already in the target language or not in any of
of the source languages (if configured), they are returned as is.
The footer with the source URL is preserved, if present.
"""
if (
" / " in old_lyrics
and self.remove_translations(old_lyrics) == new_lyrics
):
self.info("🔵 Translations already exist")
return old_lyrics
lyrics_language = langdetect.detect(new_lyrics).upper()
if lyrics_language == self.to_language:
self.info(
"🔵 Lyrics are already in the target language {}",
self.to_language,
)
return new_lyrics
if self.from_languages and lyrics_language not in self.from_languages:
self.info(
"🔵 Configuration {} does not permit translating from {}",
self.from_languages,
lyrics_language,
)
return new_lyrics
lyrics, *url = new_lyrics.split("\n\nSource: ")
with self.handle_request():
translated_lines = self.append_translations(lyrics.splitlines())
self.info("🟢 Translated lyrics to {}", self.to_language)
return "\n\nSource: ".join(["\n".join(translated_lines), *url])
@dataclass
class RestFiles:
# The content for the base index.rst generated in ReST mode.
REST_INDEX_TEMPLATE = textwrap.dedent("""
Lyrics
======
* :ref:`Song index <genindex>`
* :ref:`search`
Artist index:
.. toctree::
:maxdepth: 1
:glob:
artists/*
""").strip()
# The content for the base conf.py generated.
REST_CONF_TEMPLATE = textwrap.dedent("""
master_doc = "index"
project = "Lyrics"
copyright = "none"
author = "Various Authors"
latex_documents = [
(master_doc, "Lyrics.tex", project, author, "manual"),
]
epub_exclude_files = ["search.html"]
epub_tocdepth = 1
epub_tocdup = False
""").strip()
directory: Path
@cached_property
def artists_dir(self) -> Path:
dir = self.directory / "artists"
dir.mkdir(parents=True, exist_ok=True)
return dir
def write_indexes(self) -> None:
"""Write conf.py and index.rst files necessary for Sphinx
We write minimal configurations that are necessary for Sphinx
to operate. We do not overwrite existing files so that
customizations are respected."""
index_file = self.directory / "index.rst"
if not index_file.exists():
index_file.write_text(self.REST_INDEX_TEMPLATE)
conf_file = self.directory / "conf.py"
if not conf_file.exists():
conf_file.write_text(self.REST_CONF_TEMPLATE)
def write_artist(self, artist: str, items: Iterable[Item]) -> None:
parts = [
f'{artist}\n{"=" * len(artist)}',
".. contents::\n :local:",
]
for album, items in groupby(items, key=lambda i: i.album):
parts.append(f'{album}\n{"-" * len(album)}')
parts.extend(
part
for i in items
if (title := f":index:`{i.title.strip()}`")
for part in (
f'{title}\n{"~" * len(title)}',
textwrap.indent(i.lyrics, "| "),
)
)
file = self.artists_dir / f"{slug(artist)}.rst"
file.write_text("\n\n".join(parts).strip())
def write(self, items: list[Item]) -> None:
self.directory.mkdir(exist_ok=True, parents=True)
self.write_indexes()
items.sort(key=lambda i: i.albumartist)
for artist, artist_items in groupby(items, key=lambda i: i.albumartist):
self.write_artist(artist.strip(), artist_items)
d = self.directory
text = f"""
ReST files generated. to build, use one of:
sphinx-build -b html {d} {d/"html"}
sphinx-build -b epub {d} {d/"epub"}
sphinx-build -b latex {d} {d/"latex"} && make -C {d/"latex"} all-pdf
"""
ui.print_(textwrap.dedent(text))
class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
BACKEND_BY_NAME = {
b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch]
@ -776,15 +964,23 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
return [self.BACKEND_BY_NAME[c](self.config, self._log) for c in chosen]
@cached_property
def translator(self) -> Translator | None:
config = self.config["translate"]
if config["api_key"].get() and config["to_language"].get():
return Translator.from_config(self._log, **config.flatten())
return None
def __init__(self):
super().__init__()
self.import_stages = [self.imported]
self.config.add(
{
"auto": True,
"bing_client_secret": None,
"bing_lang_from": [],
"bing_lang_to": None,
"translate": {
"api_key": None,
"from_languages": [],
"to_language": None,
},
"dist_thresh": 0.11,
"google_API_key": None,
"google_engine_ID": "009217259823014548361:lndtuqkycfu",
@ -795,6 +991,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
"fallback": None,
"force": False,
"local": False,
"print": False,
"synced": False,
# Musixmatch is disabled by default as they are currently blocking
# requests with the beets user agent.
@ -803,52 +1000,27 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
],
}
)
self.config["bing_client_secret"].redact = True
self.config["translate"]["api_key"].redact = True
self.config["google_API_key"].redact = True
self.config["google_engine_ID"].redact = True
self.config["genius_api_key"].redact = True
# State information for the ReST writer.
# First, the current artist we're writing.
self.artist = "Unknown artist"
# The current album: False means no album yet.
self.album = False
# The current rest file content. None means the file is not
# open yet.
self.rest = None
self.config["bing_lang_from"] = [
x.lower() for x in self.config["bing_lang_from"].as_str_seq()
]
@cached_property
def bing_access_token(self) -> str | None:
params = {
"client_id": "beets",
"client_secret": self.config["bing_client_secret"],
"scope": "https://api.microsofttranslator.com",
"grant_type": "client_credentials",
}
oauth_url = "https://datamarket.accesscontrol.windows.net/v2/OAuth2-13"
with self.handle_request():
r = r_session.post(oauth_url, params=params)
return r.json()["access_token"]
if self.config["auto"]:
self.import_stages = [self.imported]
def commands(self):
cmd = ui.Subcommand("lyrics", help="fetch song lyrics")
cmd.parser.add_option(
"-p",
"--print",
dest="printlyr",
action="store_true",
default=False,
default=self.config["print"].get(),
help="print lyrics to console",
)
cmd.parser.add_option(
"-r",
"--write-rest",
dest="writerest",
dest="rest_directory",
action="store",
default=None,
metavar="dir",
@ -857,154 +1029,69 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
cmd.parser.add_option(
"-f",
"--force",
dest="force_refetch",
action="store_true",
default=False,
default=self.config["force"].get(),
help="always re-download lyrics",
)
cmd.parser.add_option(
"-l",
"--local",
dest="local_only",
action="store_true",
default=False,
default=self.config["local"].get(),
help="do not fetch missing lyrics",
)
def func(lib, opts, args):
def func(lib: Library, opts, args) -> None:
# The "write to files" option corresponds to the
# import_write config value.
write = ui.should_write()
if opts.writerest:
self.writerest_indexes(opts.writerest)
items = lib.items(ui.decargs(args))
self.config.set(vars(opts))
items = list(lib.items(args))
for item in items:
if not opts.local_only and not self.config["local"]:
self.fetch_item_lyrics(
item, write, opts.force_refetch or self.config["force"]
)
if item.lyrics:
if opts.printlyr:
ui.print_(item.lyrics)
if opts.writerest:
self.appendrest(opts.writerest, item)
if opts.writerest and items:
# flush last artist & write to ReST
self.writerest(opts.writerest)
ui.print_("ReST files generated. to build, use one of:")
ui.print_(
" sphinx-build -b html %s _build/html" % opts.writerest
)
ui.print_(
" sphinx-build -b epub %s _build/epub" % opts.writerest
)
ui.print_(
(
" sphinx-build -b latex %s _build/latex "
"&& make -C _build/latex all-pdf"
)
% opts.writerest
)
self.add_item_lyrics(item, ui.should_write())
if item.lyrics and opts.print:
ui.print_(item.lyrics)
if opts.rest_directory and (
items := [i for i in items if i.lyrics]
):
RestFiles(Path(opts.rest_directory)).write(items)
cmd.func = func
return [cmd]
def appendrest(self, directory, item):
"""Append the item to an ReST file
This will keep state (in the `rest` variable) in order to avoid
writing continuously to the same files.
"""
if slug(self.artist) != slug(item.albumartist):
# Write current file and start a new one ~ item.albumartist
self.writerest(directory)
self.artist = item.albumartist.strip()
self.rest = "%s\n%s\n\n.. contents::\n :local:\n\n" % (
self.artist,
"=" * len(self.artist),
)
if self.album != item.album:
tmpalbum = self.album = item.album.strip()
if self.album == "":
tmpalbum = "Unknown album"
self.rest += "{}\n{}\n\n".format(tmpalbum, "-" * len(tmpalbum))
title_str = ":index:`%s`" % item.title.strip()
block = "| " + item.lyrics.replace("\n", "\n| ")
self.rest += "{}\n{}\n\n{}\n\n".format(
title_str, "~" * len(title_str), block
)
def writerest(self, directory):
"""Write self.rest to a ReST file"""
if self.rest is not None and self.artist is not None:
path = os.path.join(
directory, "artists", slug(self.artist) + ".rst"
)
with open(path, "wb") as output:
output.write(self.rest.encode("utf-8"))
def writerest_indexes(self, directory):
"""Write conf.py and index.rst files necessary for Sphinx
We write minimal configurations that are necessary for Sphinx
to operate. We do not overwrite existing files so that
customizations are respected."""
try:
os.makedirs(os.path.join(directory, "artists"))
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
indexfile = os.path.join(directory, "index.rst")
if not os.path.exists(indexfile):
with open(indexfile, "w") as output:
output.write(REST_INDEX_TEMPLATE)
conffile = os.path.join(directory, "conf.py")
if not os.path.exists(conffile):
with open(conffile, "w") as output:
output.write(REST_CONF_TEMPLATE)
def imported(self, _, task: ImportTask) -> None:
"""Import hook for fetching lyrics automatically."""
if self.config["auto"]:
for item in task.imported_items():
self.fetch_item_lyrics(item, False, self.config["force"])
for item in task.imported_items():
self.add_item_lyrics(item, False)
def fetch_item_lyrics(self, item: Item, write: bool, force: bool) -> None:
def find_lyrics(self, item: Item) -> str:
album, length = item.album, round(item.length)
matches = (
[
lyrics
for t in titles
if (lyrics := self.get_lyrics(a, t, album, length))
]
for a, titles in search_pairs(item)
)
return "\n\n---\n\n".join(next(filter(None, matches), []))
def add_item_lyrics(self, item: Item, write: bool) -> None:
"""Fetch and store lyrics for a single item. If ``write``, then the
lyrics will also be written to the file itself.
"""
# Skip if the item already has lyrics.
if not force and item.lyrics:
if self.config["local"]:
return
if not self.config["force"] and item.lyrics:
self.info("🔵 Lyrics already present: {}", item)
return
lyrics_matches = []
album, length = item.album, round(item.length)
for artist, titles in search_pairs(item):
lyrics_matches = [
self.get_lyrics(artist, title, album, length)
for title in titles
]
if any(lyrics_matches):
break
lyrics = "\n\n---\n\n".join(filter(None, lyrics_matches))
if lyrics:
if lyrics := self.find_lyrics(item):
self.info("🟢 Found lyrics: {0}", item)
if self.config["bing_client_secret"].get():
lang_from = langdetect.detect(lyrics)
if self.config["bing_lang_to"].get() != lang_from and (
not self.config["bing_lang_from"]
or (lang_from in self.config["bing_lang_from"].as_str_seq())
):
lyrics = self.append_translation(
lyrics, self.config["bing_lang_to"]
)
if translator := self.translator:
lyrics = translator.translate(lyrics, item.lyrics)
else:
self.info("🔴 Lyrics not found: {}", item)
lyrics = self.config["fallback"].get()
@ -1027,30 +1114,3 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
return f"{lyrics}\n\nSource: {url}"
return None
def append_translation(self, text, to_lang):
from xml.etree import ElementTree
if not (token := self.bing_access_token):
self.warn(
"Could not get Bing Translate API access token. "
"Check your 'bing_client_secret' password."
)
return text
# Extract unique lines to limit API request size per song
lines = text.split("\n")
unique_lines = set(lines)
url = "https://api.microsofttranslator.com/v2/Http.svc/Translate"
with self.handle_request():
text = self.fetch_text(
url,
headers={"Authorization": f"Bearer {token}"},
params={"text": "|".join(unique_lines), "to": to_lang},
)
if translated := ElementTree.fromstring(text.encode("utf-8")).text:
# Use a translation mapping dict to build resulting lyrics
translations = dict(zip(unique_lines, translated.split("|")))
return "".join(f"{ln} / {translations[ln]}\n" for ln in lines)
return text

View file

@ -19,6 +19,8 @@ New features:
control the maximum allowed distance between the lyrics search result and the
tagged item's artist and title. This is useful for preventing false positives
when fetching lyrics.
* :doc:`plugins/lyrics`: Rewrite lyrics translation functionality to use Azure
AI Translator API and add relevant instructions to the documentation.
Bug fixes:

View file

@ -38,26 +38,30 @@ Default configuration:
lyrics:
auto: yes
bing_client_secret: null
bing_lang_from: []
bing_lang_to: null
translate:
api_key:
from_languages: []
to_language:
dist_thresh: 0.11
fallback: null
force: no
google_API_key: null
google_engine_ID: 009217259823014548361:lndtuqkycfu
print: no
sources: [lrclib, google, genius, tekstowo]
synced: no
The available options are:
- **auto**: Fetch lyrics automatically during import.
- **bing_client_secret**: Your Bing Translation application password
(see :ref:`lyrics-translation`)
- **bing_lang_from**: By default all lyrics with a language other than
``bing_lang_to`` are translated. Use a list of lang codes to restrict the set
of source languages to translate.
- **bing_lang_to**: Language to translate lyrics into.
- **translate**:
- **api_key**: Api key to access your Azure Translator resource. (see
:ref:`lyrics-translation`)
- **from_languages**: By default all lyrics with a language other than
``translate_to`` are translated. Use a list of language codes to restrict
them.
- **to_language**: Language code to translate lyrics to.
- **dist_thresh**: The maximum distance between the artist and title
combination of the music file and lyrics candidate to consider them a match.
Lower values will make the plugin more strict, higher values will make it
@ -72,6 +76,7 @@ The available options are:
- **google_engine_ID**: The custom search engine to use.
Default: The `beets custom search engine`_, which gathers an updated list of
sources known to be scrapeable.
- **print**: Print lyrics to the console.
- **sources**: List of sources to search for lyrics. An asterisk ``*`` expands
to all available sources. The ``google`` source will be automatically
deactivated if no ``google_API_key`` is setup.
@ -104,9 +109,8 @@ Rendering Lyrics into Other Formats
-----------------------------------
The ``-r directory, --write-rest directory`` option renders all lyrics as
`reStructuredText`_ (ReST) documents in ``directory`` (by default, the current
directory). That directory, in turn, can be parsed by tools like `Sphinx`_ to
generate HTML, ePUB, or PDF documents.
`reStructuredText`_ (ReST) documents in ``directory``. That directory, in turn,
can be parsed by tools like `Sphinx`_ to generate HTML, ePUB, or PDF documents.
Minimal ``conf.py`` and ``index.rst`` files are created the first time the
command is run. They are not overwritten on subsequent runs, so you can safely
@ -119,19 +123,19 @@ Sphinx supports various `builders`_, see a few suggestions:
::
sphinx-build -b html . _build/html
sphinx-build -b html <dir> <dir>/html
.. admonition:: Build an ePUB3 formatted file, usable on ebook readers
::
sphinx-build -b epub3 . _build/epub
sphinx-build -b epub3 <dir> <dir>/epub
.. admonition:: Build a PDF file, which incidentally also builds a LaTeX file
::
sphinx-build -b latex %s _build/latex && make -C _build/latex all-pdf
sphinx-build -b latex <dir> <dir>/latex && make -C <dir>/latex all-pdf
.. _Sphinx: https://www.sphinx-doc.org/
@ -165,10 +169,28 @@ After that, the lyrics plugin will fall back on other declared data sources.
Activate On-the-Fly Translation
-------------------------------
You need to register for a Microsoft Azure Marketplace free account and
to the `Microsoft Translator API`_. Follow the four steps process, specifically
at step 3 enter ``beets`` as *Client ID* and copy/paste the generated
*Client secret* into your ``bing_client_secret`` configuration, alongside
``bing_lang_to`` target ``language code``.
We use Azure to optionally translate your lyrics. To set up the integration,
follow these steps:
.. _Microsoft Translator API: https://docs.microsoft.com/en-us/azure/cognitive-services/translator/translator-how-to-signup
1. `Create a Translator resource`_ on Azure.
2. `Obtain its API key`_.
3. Add the API key to your configuration as ``translate.api_key``.
4. Configure your target language using the ``translate.to_language`` option.
For example, with the following configuration
.. code-block:: yaml
lyrics:
translate:
api_key: YOUR_TRANSLATOR_API_KEY
to_language: de
You should expect lyrics like this::
Original verse / Ursprünglicher Vers
Some other verse / Ein anderer Vers
.. _create a Translator resource: https://learn.microsoft.com/en-us/azure/ai-services/translator/create-translator-resource
.. _obtain its API key: https://learn.microsoft.com/en-us/python/api/overview/azure/ai-translation-text-readme?view=azure-python&preserve-view=true#get-an-api-key

View file

@ -15,7 +15,9 @@ markers =
data_file = .reports/coverage/data
branch = true
relative_files = true
omit = beets/test/*
omit =
beets/test/*
beetsplug/_typing.py
[coverage:report]
precision = 2

View file

@ -17,13 +17,15 @@
import importlib.util
import os
import re
import textwrap
from functools import partial
from http import HTTPStatus
from pathlib import Path
import pytest
from beets.library import Item
from beets.test.helper import PluginMixin
from beets.test.helper import PluginMixin, TestHelper
from beetsplug import lyrics
from .lyrics_pages import LyricsPage, lyrics_pages
@ -40,6 +42,14 @@ PHRASE_BY_TITLE = {
}
@pytest.fixture(scope="module")
def helper():
helper = TestHelper()
helper.setup_beets()
yield helper
helper.teardown_beets()
class TestLyricsUtils:
@pytest.mark.parametrize(
"artist, title",
@ -238,6 +248,27 @@ class TestLyricsPlugin(LyricsPluginMixin):
assert last_log
assert re.search(expected_log_match, last_log, re.I)
@pytest.mark.parametrize(
"plugin_config, found, expected",
[
({}, "new", "old"),
({"force": True}, "new", "new"),
({"force": True, "local": True}, "new", "old"),
({"force": True, "fallback": None}, "", "old"),
({"force": True, "fallback": ""}, "", ""),
({"force": True, "fallback": "default"}, "", "default"),
],
)
def test_overwrite_config(
self, monkeypatch, helper, lyrics_plugin, found, expected
):
monkeypatch.setattr(lyrics_plugin, "find_lyrics", lambda _: found)
item = helper.create_item(id=1, lyrics="old")
lyrics_plugin.add_item_lyrics(item, False)
assert item.lyrics == expected
class LyricsBackendTest(LyricsPluginMixin):
@pytest.fixture
@ -287,8 +318,13 @@ class TestLyricsSources(LyricsBackendTest):
def test_backend_source(self, lyrics_plugin, lyrics_page: LyricsPage):
"""Test parsed lyrics from each of the configured lyrics pages."""
lyrics_info = lyrics_plugin.get_lyrics(
lyrics_page.artist, lyrics_page.track_title, "", 186
lyrics_info = lyrics_plugin.find_lyrics(
Item(
artist=lyrics_page.artist,
title=lyrics_page.track_title,
album="",
length=186.0,
)
)
assert lyrics_info
@ -509,3 +545,144 @@ class TestLRCLibLyrics(LyricsBackendTest):
lyrics, _ = fetch_lyrics()
assert lyrics == expected_lyrics
class TestTranslation:
@pytest.fixture(autouse=True)
def _patch_bing(self, requests_mock):
def callback(request, _):
if b"Refrain" in request.body:
translations = (
""
" | [Refrain : Doja Cat]"
" | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501
" | Mon corps ne me laissait pas le cacher (Cachez-le)"
" | Quoi quil arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501
" | Chevauchant à travers le tonnerre, la foudre"
)
elif b"00:00.00" in request.body:
translations = (
""
" | [00:00.00] Quelques paroles synchronisées"
" | [00:01.00] Quelques paroles plus synchronisées"
)
else:
translations = (
""
" | Quelques paroles synchronisées"
" | Quelques paroles plus synchronisées"
)
return [
{
"detectedLanguage": {"language": "en", "score": 1.0},
"translations": [{"text": translations, "to": "fr"}],
}
]
requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback)
@pytest.mark.parametrize(
"new_lyrics, old_lyrics, expected",
[
pytest.param(
"""
[Refrain: Doja Cat]
Hard for me to let you go (Let you go, let you go)
My body wouldn't let me hide it (Hide it)
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold)
Ridin' through the thunder, lightnin'""",
"",
"""
[Refrain: Doja Cat] / [Refrain : Doja Cat]
Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)
My body wouldn't let me hide it (Hide it) / Mon corps ne me laissait pas le cacher (Cachez-le)
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) / Quoi quil arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)
Ridin' through the thunder, lightnin' / Chevauchant à travers le tonnerre, la foudre""", # noqa: E501
id="plain",
),
pytest.param(
"""
[00:00.00] Some synced lyrics
[00:00:50]
[00:01.00] Some more synced lyrics
Source: https://lrclib.net/api/123""",
"",
"""
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
[00:00:50]
[00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées
Source: https://lrclib.net/api/123""", # noqa: E501
id="synced",
),
pytest.param(
"Quelques paroles",
"",
"Quelques paroles",
id="already in the target language",
),
pytest.param(
"Some lyrics",
"Some lyrics / Some translation",
"Some lyrics / Some translation",
id="already translated",
),
],
)
def test_translate(self, new_lyrics, old_lyrics, expected):
plugin = lyrics.LyricsPlugin()
bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"])
assert bing.translate(
textwrap.dedent(new_lyrics), old_lyrics
) == textwrap.dedent(expected)
class TestRestFiles:
@pytest.fixture
def rest_dir(self, tmp_path):
return tmp_path
@pytest.fixture
def rest_files(self, rest_dir):
return lyrics.RestFiles(rest_dir)
def test_write(self, rest_dir: Path, rest_files):
items = [
Item(albumartist=aa, album=a, title=t, lyrics=lyr)
for aa, a, t, lyr in [
("Artist One", "Album One", "Song One", "Lyrics One"),
("Artist One", "Album One", "Song Two", "Lyrics Two"),
("Artist Two", "Album Two", "Song Three", "Lyrics Three"),
]
]
rest_files.write(items)
assert (rest_dir / "index.rst").exists()
assert (rest_dir / "conf.py").exists()
artist_one_file = rest_dir / "artists" / "artist-one.rst"
artist_two_file = rest_dir / "artists" / "artist-two.rst"
assert artist_one_file.exists()
assert artist_two_file.exists()
c = artist_one_file.read_text()
assert (
c.index("Artist One")
< c.index("Album One")
< c.index("Song One")
< c.index("Lyrics One")
< c.index("Song Two")
< c.index("Lyrics Two")
)
c = artist_two_file.read_text()
assert (
c.index("Artist Two")
< c.index("Album Two")
< c.index("Song Three")
< c.index("Lyrics Three")
)