From d7201062a8f9e323e2efe36c5365adaa40ab9e55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 13 Oct 2024 23:42:55 +0100 Subject: [PATCH 1/6] Resurrect translation functionality --- beetsplug/_typing.py | 20 ++++ beetsplug/lyrics.py | 183 ++++++++++++++++++++++++------------ docs/changelog.rst | 2 + docs/plugins/lyrics.rst | 51 +++++++--- test/plugins/test_lyrics.py | 85 +++++++++++++++++ 5 files changed, 268 insertions(+), 73 deletions(-) diff --git a/beetsplug/_typing.py b/beetsplug/_typing.py index 915ea77e8..b772ffdd5 100644 --- a/beetsplug/_typing.py +++ b/beetsplug/_typing.py @@ -113,3 +113,23 @@ class GoogleCustomSearchAPI: """Pagemap data with a single meta tags dict in a list.""" metatags: list[JSONDict] + + +class TranslatorAPI: + class Language(TypedDict): + """Language data returned by the translator API.""" + + language: str + score: float + + class Translation(TypedDict): + """Translation data returned by the translator API.""" + + text: str + to: str + + class Response(TypedDict): + """Response from the translator API.""" + + detectedLanguage: TranslatorAPI.Language + translations: list[TranslatorAPI.Translation] diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 1732edbf7..9b7f39e5b 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -40,10 +40,18 @@ from beets import plugins, ui from beets.autotag.hooks import string_dist if TYPE_CHECKING: + from logging import Logger + from beets.importer import ImportTask from beets.library import Item - from ._typing import GeniusAPI, GoogleCustomSearchAPI, JSONDict, LRCLibAPI + from ._typing import ( + GeniusAPI, + GoogleCustomSearchAPI, + JSONDict, + LRCLibAPI, + TranslatorAPI, + ) USER_AGENT = f"beets/{beets.__version__}" INSTRUMENTAL_LYRICS = "[Instrumental]" @@ -252,6 +260,12 @@ class RequestHandler: self.debug("Fetching JSON from {}", url) return r_session.get(url, **kwargs).json() + def post_json(self, url: str, params: JSONDict | None = None, **kwargs): + """Send POST request and return JSON response.""" + url = self.format_url(url, params) + self.debug("Posting JSON to {}", url) + return r_session.post(url, **kwargs).json() + @contextmanager def handle_request(self) -> Iterator[None]: try: @@ -760,6 +774,97 @@ class Google(SearchBackend): return None +@dataclass +class Translator(RequestHandler): + TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" + LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + + _log: Logger + api_key: str + to_language: str + from_languages: list[str] + + @classmethod + def from_config( + cls, + log: Logger, + api_key: str, + to_language: str, + from_languages: list[str] | None = None, + ) -> Translator: + return cls( + log, + api_key, + to_language.upper(), + [x.upper() for x in from_languages or []], + ) + + def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]: + """Return translations for the given texts. + + To reduce the translation 'cost', we translate unique texts, and then + map the translations back to the original texts. + """ + unique_texts = list(dict.fromkeys(texts)) + data: list[TranslatorAPI.Response] = self.post_json( + self.TRANSLATE_URL, + headers={"Ocp-Apim-Subscription-Key": self.api_key}, + json=[{"text": "|".join(unique_texts)}], + params={"api-version": "3.0", "to": self.to_language}, + ) + + translations = data[0]["translations"][0]["text"].split("|") + trans_by_text = dict(zip(unique_texts, translations)) + return list(zip(texts, (trans_by_text.get(t, "") for t in texts))) + + @classmethod + def split_line(cls, line: str) -> tuple[str, str]: + """Split line to (timestamp, text).""" + if m := cls.LINE_PARTS_RE.match(line): + return m[1], m[2] + + return "", "" + + def append_translations(self, lines: Iterable[str]) -> list[str]: + """Append translations to the given lyrics texts. + + Lines may contain timestamps from LRCLib which need to be temporarily + removed for the translation. They can take any of these forms: + - empty + Text - text only + [00:00:00] - timestamp only + [00:00:00] Text - timestamp with text + """ + # split into [(timestamp, text), ...]] + ts_and_text = list(map(self.split_line, lines)) + timestamps = [ts for ts, _ in ts_and_text] + text_pairs = self.get_translations([ln for _, ln in ts_and_text]) + + # only add the separator for non-empty translations + texts = [" / ".join(filter(None, p)) for p in text_pairs] + # only add the space between non-empty timestamps and texts + return [" ".join(filter(None, p)) for p in zip(timestamps, texts)] + + def translate(self, lyrics: str) -> str: + """Translate the given lyrics to the target language. + + If the lyrics are already in the target language or not in any of + of the source languages (if configured), they are returned as is. + + The footer with the source URL is preserved, if present. + """ + lyrics_language = langdetect.detect(lyrics).upper() + if lyrics_language == self.to_language or ( + self.from_languages and lyrics_language not in self.from_languages + ): + return lyrics + + lyrics, *url = lyrics.split("\n\nSource: ") + with self.handle_request(): + translated_lines = self.append_translations(lyrics.splitlines()) + return "\n\nSource: ".join(["\n".join(translated_lines), *url]) + + class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): BACKEND_BY_NAME = { b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch] @@ -776,15 +881,24 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): return [self.BACKEND_BY_NAME[c](self.config, self._log) for c in chosen] + @cached_property + def translator(self) -> Translator | None: + config = self.config["translate"] + if config["api_key"].get() and config["to_language"].get(): + return Translator.from_config(self._log, **config.flatten()) + return None + def __init__(self): super().__init__() self.import_stages = [self.imported] self.config.add( { "auto": True, - "bing_client_secret": None, - "bing_lang_from": [], - "bing_lang_to": None, + "translate": { + "api_key": None, + "from_languages": [], + "to_language": None, + }, "dist_thresh": 0.11, "google_API_key": None, "google_engine_ID": "009217259823014548361:lndtuqkycfu", @@ -803,7 +917,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): ], } ) - self.config["bing_client_secret"].redact = True + self.config["translate"]["api_key"].redact = True self.config["google_API_key"].redact = True self.config["google_engine_ID"].redact = True self.config["genius_api_key"].redact = True @@ -817,24 +931,6 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): # open yet. self.rest = None - self.config["bing_lang_from"] = [ - x.lower() for x in self.config["bing_lang_from"].as_str_seq() - ] - - @cached_property - def bing_access_token(self) -> str | None: - params = { - "client_id": "beets", - "client_secret": self.config["bing_client_secret"], - "scope": "https://api.microsofttranslator.com", - "grant_type": "client_credentials", - } - - oauth_url = "https://datamarket.accesscontrol.windows.net/v2/OAuth2-13" - with self.handle_request(): - r = r_session.post(oauth_url, params=params) - return r.json()["access_token"] - def commands(self): cmd = ui.Subcommand("lyrics", help="fetch song lyrics") cmd.parser.add_option( @@ -996,14 +1092,12 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): if lyrics: self.info("🟢 Found lyrics: {0}", item) - if self.config["bing_client_secret"].get(): - lang_from = langdetect.detect(lyrics) - if self.config["bing_lang_to"].get() != lang_from and ( - not self.config["bing_lang_from"] - or (lang_from in self.config["bing_lang_from"].as_str_seq()) - ): - lyrics = self.append_translation( - lyrics, self.config["bing_lang_to"] + if translator := self.translator: + initial_lyrics = lyrics + if (lyrics := translator.translate(lyrics)) != initial_lyrics: + self.info( + "🟢 Added translation to {}", + self.config["translate_to"].get().upper(), ) else: self.info("🔴 Lyrics not found: {}", item) @@ -1027,30 +1121,3 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): return f"{lyrics}\n\nSource: {url}" return None - - def append_translation(self, text, to_lang): - from xml.etree import ElementTree - - if not (token := self.bing_access_token): - self.warn( - "Could not get Bing Translate API access token. " - "Check your 'bing_client_secret' password." - ) - return text - - # Extract unique lines to limit API request size per song - lines = text.split("\n") - unique_lines = set(lines) - url = "https://api.microsofttranslator.com/v2/Http.svc/Translate" - with self.handle_request(): - text = self.fetch_text( - url, - headers={"Authorization": f"Bearer {token}"}, - params={"text": "|".join(unique_lines), "to": to_lang}, - ) - if translated := ElementTree.fromstring(text.encode("utf-8")).text: - # Use a translation mapping dict to build resulting lyrics - translations = dict(zip(unique_lines, translated.split("|"))) - return "".join(f"{ln} / {translations[ln]}\n" for ln in lines) - - return text diff --git a/docs/changelog.rst b/docs/changelog.rst index ecf1c01d3..43cdd1255 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -19,6 +19,8 @@ New features: control the maximum allowed distance between the lyrics search result and the tagged item's artist and title. This is useful for preventing false positives when fetching lyrics. +* :doc:`plugins/lyrics`: Rewrite lyrics translation functionality to use Azure + AI Translator API and add relevant instructions to the documentation. Bug fixes: diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index f034cf47a..3ef7ab89b 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -38,9 +38,10 @@ Default configuration: lyrics: auto: yes - bing_client_secret: null - bing_lang_from: [] - bing_lang_to: null + translate: + api_key: + from_languages: [] + to_language: dist_thresh: 0.11 fallback: null force: no @@ -52,12 +53,14 @@ Default configuration: The available options are: - **auto**: Fetch lyrics automatically during import. -- **bing_client_secret**: Your Bing Translation application password - (see :ref:`lyrics-translation`) -- **bing_lang_from**: By default all lyrics with a language other than - ``bing_lang_to`` are translated. Use a list of lang codes to restrict the set - of source languages to translate. -- **bing_lang_to**: Language to translate lyrics into. +- **translate**: + + - **api_key**: Api key to access your Azure Translator resource. (see + :ref:`lyrics-translation`) + - **from_languages**: By default all lyrics with a language other than + ``translate_to`` are translated. Use a list of language codes to restrict + them. + - **to_language**: Language code to translate lyrics to. - **dist_thresh**: The maximum distance between the artist and title combination of the music file and lyrics candidate to consider them a match. Lower values will make the plugin more strict, higher values will make it @@ -165,10 +168,28 @@ After that, the lyrics plugin will fall back on other declared data sources. Activate On-the-Fly Translation ------------------------------- -You need to register for a Microsoft Azure Marketplace free account and -to the `Microsoft Translator API`_. Follow the four steps process, specifically -at step 3 enter ``beets`` as *Client ID* and copy/paste the generated -*Client secret* into your ``bing_client_secret`` configuration, alongside -``bing_lang_to`` target ``language code``. +We use Azure to optionally translate your lyrics. To set up the integration, +follow these steps: -.. _Microsoft Translator API: https://docs.microsoft.com/en-us/azure/cognitive-services/translator/translator-how-to-signup +1. `Create a Translator resource`_ on Azure. +2. `Obtain its API key`_. +3. Add the API key to your configuration as ``translate.api_key``. +4. Configure your target language using the ``translate.to_language`` option. + + +For example, with the following configuration + +.. code-block:: yaml + + lyrics: + translate: + api_key: YOUR_TRANSLATOR_API_KEY + to_language: de + +You should expect lyrics like this:: + + Original verse / Ursprünglicher Vers + Some other verse / Ein anderer Vers + +.. _create a Translator resource: https://learn.microsoft.com/en-us/azure/ai-services/translator/create-translator-resource +.. _obtain its API key: https://learn.microsoft.com/en-us/python/api/overview/azure/ai-translation-text-readme?view=azure-python&preserve-view=true#get-an-api-key diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index a3c640109..a1591aa24 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -17,6 +17,7 @@ import importlib.util import os import re +import textwrap from functools import partial from http import HTTPStatus @@ -509,3 +510,87 @@ class TestLRCLibLyrics(LyricsBackendTest): lyrics, _ = fetch_lyrics() assert lyrics == expected_lyrics + + +class TestTranslation: + @pytest.fixture(autouse=True) + def _patch_bing(self, requests_mock): + def callback(request, _): + if b"Refrain" in request.body: + translations = ( + "" + "|[Refrain : Doja Cat]" + "|Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501 + "|Mon corps ne me laissait pas le cacher (Cachez-le)" + "|Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501 + "|Chevauchant à travers le tonnerre, la foudre" + ) + elif b"00:00.00" in request.body: + translations = ( + "" + "|[00:00.00] Quelques paroles synchronisées" + "|[00:01.00] Quelques paroles plus synchronisées" + ) + else: + translations = ( + "" + "|Quelques paroles synchronisées" + "|Quelques paroles plus synchronisées" + ) + + return [ + { + "detectedLanguage": {"language": "en", "score": 1.0}, + "translations": [{"text": translations, "to": "fr"}], + } + ] + + requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback) + + @pytest.mark.parametrize( + "initial_lyrics, expected", + [ + pytest.param( + """ + [Refrain: Doja Cat] + Hard for me to let you go (Let you go, let you go) + My body wouldn't let me hide it (Hide it) + No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) + Ridin' through the thunder, lightnin'""", + """ + [Refrain: Doja Cat] / [Refrain : Doja Cat] + Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir) + My body wouldn't let me hide it (Hide it) / Mon corps ne me laissait pas le cacher (Cachez-le) + No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) / Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas) + Ridin' through the thunder, lightnin' / Chevauchant à travers le tonnerre, la foudre""", # noqa: E501 + id="plain", + ), + pytest.param( + """ + [00:00.00] Some synced lyrics + [00:00:50] + [00:01.00] Some more synced lyrics + + Source: https://lrclib.net/api/123""", + """ + [00:00.00] Some synced lyrics / Quelques paroles synchronisées + [00:00:50] + [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées + + Source: https://lrclib.net/api/123""", # noqa: E501 + id="synced", + ), + pytest.param( + "Quelques paroles", + "Quelques paroles", + id="already in the target language", + ), + ], + ) + def test_translate(self, initial_lyrics, expected): + plugin = lyrics.LyricsPlugin() + bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"]) + + assert bing.translate( + textwrap.dedent(initial_lyrics) + ) == textwrap.dedent(expected) From c95156adcd656a1ac1a3c34428b2978e0cc55d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 14 Oct 2024 00:04:50 +0100 Subject: [PATCH 2/6] Refactor writing rest files --- beetsplug/lyrics.py | 233 ++++++++++++++++-------------------- docs/plugins/lyrics.rst | 11 +- test/plugins/test_lyrics.py | 49 ++++++++ 3 files changed, 158 insertions(+), 135 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 9b7f39e5b..7e09cc0fe 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -17,16 +17,17 @@ from __future__ import annotations import atexit -import errno import itertools import math -import os.path import re +import textwrap from contextlib import contextmanager, suppress from dataclasses import dataclass from functools import cached_property, partial, total_ordering from html import unescape from http import HTTPStatus +from itertools import groupby +from pathlib import Path from typing import TYPE_CHECKING, Iterable, Iterator, NamedTuple from urllib.parse import quote, quote_plus, urlencode, urlparse @@ -56,41 +57,6 @@ if TYPE_CHECKING: USER_AGENT = f"beets/{beets.__version__}" INSTRUMENTAL_LYRICS = "[Instrumental]" -# The content for the base index.rst generated in ReST mode. -REST_INDEX_TEMPLATE = """Lyrics -====== - -* :ref:`Song index ` -* :ref:`search` - -Artist index: - -.. toctree:: - :maxdepth: 1 - :glob: - - artists/* -""" - -# The content for the base conf.py generated. -REST_CONF_TEMPLATE = """# -*- coding: utf-8 -*- -master_doc = 'index' -project = 'Lyrics' -copyright = 'none' -author = 'Various Authors' -latex_documents = [ - (master_doc, 'Lyrics.tex', project, - author, 'manual'), -] -epub_title = project -epub_author = author -epub_publisher = author -epub_copyright = copyright -epub_exclude_files = ['search.html'] -epub_tocdepth = 1 -epub_tocdup = False -""" - class NotFoundError(requests.exceptions.HTTPError): pass @@ -865,6 +831,97 @@ class Translator(RequestHandler): return "\n\nSource: ".join(["\n".join(translated_lines), *url]) +@dataclass +class RestFiles: + # The content for the base index.rst generated in ReST mode. + REST_INDEX_TEMPLATE = textwrap.dedent(""" + Lyrics + ====== + + * :ref:`Song index ` + * :ref:`search` + + Artist index: + + .. toctree:: + :maxdepth: 1 + :glob: + + artists/* + """).strip() + + # The content for the base conf.py generated. + REST_CONF_TEMPLATE = textwrap.dedent(""" + master_doc = "index" + project = "Lyrics" + copyright = "none" + author = "Various Authors" + latex_documents = [ + (master_doc, "Lyrics.tex", project, author, "manual"), + ] + epub_exclude_files = ["search.html"] + epub_tocdepth = 1 + epub_tocdup = False + """).strip() + + directory: Path + + @cached_property + def artists_dir(self) -> Path: + dir = self.directory / "artists" + dir.mkdir(parents=True, exist_ok=True) + return dir + + def write_indexes(self) -> None: + """Write conf.py and index.rst files necessary for Sphinx + + We write minimal configurations that are necessary for Sphinx + to operate. We do not overwrite existing files so that + customizations are respected.""" + index_file = self.directory / "index.rst" + if not index_file.exists(): + index_file.write_text(self.REST_INDEX_TEMPLATE) + conf_file = self.directory / "conf.py" + if not conf_file.exists(): + conf_file.write_text(self.REST_CONF_TEMPLATE) + + def write_artist(self, artist: str, items: Iterable[Item]) -> None: + parts = [ + f'{artist}\n{"=" * len(artist)}', + ".. contents::\n :local:", + ] + for album, items in groupby(items, key=lambda i: i.album): + parts.append(f'{album}\n{"-" * len(album)}') + parts.extend( + part + for i in items + if (title := f":index:`{i.title.strip()}`") + for part in ( + f'{title}\n{"~" * len(title)}', + textwrap.indent(i.lyrics, "| "), + ) + ) + file = self.artists_dir / f"{slug(artist)}.rst" + file.write_text("\n\n".join(parts).strip()) + + def write(self, items: list[Item]) -> None: + self.directory.mkdir(exist_ok=True, parents=True) + self.write_indexes() + + items.sort(key=lambda i: i.albumartist) + for artist, artist_items in groupby(items, key=lambda i: i.albumartist): + self.write_artist(artist.strip(), artist_items) + + d = self.directory + text = f""" + ReST files generated. to build, use one of: + sphinx-build -b html {d} {d/"html"} + sphinx-build -b epub {d} {d/"epub"} + sphinx-build -b latex {d} {d/"latex"} && make -C {d/"latex"} all-pdf + """ + ui.print_(textwrap.dedent(text)) + + class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): BACKEND_BY_NAME = { b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch] @@ -922,15 +979,6 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): self.config["google_engine_ID"].redact = True self.config["genius_api_key"].redact = True - # State information for the ReST writer. - # First, the current artist we're writing. - self.artist = "Unknown artist" - # The current album: False means no album yet. - self.album = False - # The current rest file content. None means the file is not - # open yet. - self.rest = None - def commands(self): cmd = ui.Subcommand("lyrics", help="fetch song lyrics") cmd.parser.add_option( @@ -944,7 +992,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): cmd.parser.add_option( "-r", "--write-rest", - dest="writerest", + dest="rest_directory", action="store", default=None, metavar="dir", @@ -970,99 +1018,26 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): def func(lib, opts, args): # The "write to files" option corresponds to the # import_write config value. - write = ui.should_write() - if opts.writerest: - self.writerest_indexes(opts.writerest) - items = lib.items(ui.decargs(args)) + items = list(lib.items(ui.decargs(args))) for item in items: if not opts.local_only and not self.config["local"]: self.fetch_item_lyrics( - item, write, opts.force_refetch or self.config["force"] + item, + ui.should_write(), + opts.force_refetch or self.config["force"], ) if item.lyrics: if opts.printlyr: ui.print_(item.lyrics) - if opts.writerest: - self.appendrest(opts.writerest, item) - if opts.writerest and items: - # flush last artist & write to ReST - self.writerest(opts.writerest) - ui.print_("ReST files generated. to build, use one of:") - ui.print_( - " sphinx-build -b html %s _build/html" % opts.writerest - ) - ui.print_( - " sphinx-build -b epub %s _build/epub" % opts.writerest - ) - ui.print_( - ( - " sphinx-build -b latex %s _build/latex " - "&& make -C _build/latex all-pdf" - ) - % opts.writerest - ) + + if opts.rest_directory and ( + items := [i for i in items if i.lyrics] + ): + RestFiles(Path(opts.rest_directory)).write(items) cmd.func = func return [cmd] - def appendrest(self, directory, item): - """Append the item to an ReST file - - This will keep state (in the `rest` variable) in order to avoid - writing continuously to the same files. - """ - - if slug(self.artist) != slug(item.albumartist): - # Write current file and start a new one ~ item.albumartist - self.writerest(directory) - self.artist = item.albumartist.strip() - self.rest = "%s\n%s\n\n.. contents::\n :local:\n\n" % ( - self.artist, - "=" * len(self.artist), - ) - - if self.album != item.album: - tmpalbum = self.album = item.album.strip() - if self.album == "": - tmpalbum = "Unknown album" - self.rest += "{}\n{}\n\n".format(tmpalbum, "-" * len(tmpalbum)) - title_str = ":index:`%s`" % item.title.strip() - block = "| " + item.lyrics.replace("\n", "\n| ") - self.rest += "{}\n{}\n\n{}\n\n".format( - title_str, "~" * len(title_str), block - ) - - def writerest(self, directory): - """Write self.rest to a ReST file""" - if self.rest is not None and self.artist is not None: - path = os.path.join( - directory, "artists", slug(self.artist) + ".rst" - ) - with open(path, "wb") as output: - output.write(self.rest.encode("utf-8")) - - def writerest_indexes(self, directory): - """Write conf.py and index.rst files necessary for Sphinx - - We write minimal configurations that are necessary for Sphinx - to operate. We do not overwrite existing files so that - customizations are respected.""" - try: - os.makedirs(os.path.join(directory, "artists")) - except OSError as e: - if e.errno == errno.EEXIST: - pass - else: - raise - indexfile = os.path.join(directory, "index.rst") - if not os.path.exists(indexfile): - with open(indexfile, "w") as output: - output.write(REST_INDEX_TEMPLATE) - conffile = os.path.join(directory, "conf.py") - if not os.path.exists(conffile): - with open(conffile, "w") as output: - output.write(REST_CONF_TEMPLATE) - def imported(self, _, task: ImportTask) -> None: """Import hook for fetching lyrics automatically.""" if self.config["auto"]: diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index 3ef7ab89b..15ddea450 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -107,9 +107,8 @@ Rendering Lyrics into Other Formats ----------------------------------- The ``-r directory, --write-rest directory`` option renders all lyrics as -`reStructuredText`_ (ReST) documents in ``directory`` (by default, the current -directory). That directory, in turn, can be parsed by tools like `Sphinx`_ to -generate HTML, ePUB, or PDF documents. +`reStructuredText`_ (ReST) documents in ``directory``. That directory, in turn, +can be parsed by tools like `Sphinx`_ to generate HTML, ePUB, or PDF documents. Minimal ``conf.py`` and ``index.rst`` files are created the first time the command is run. They are not overwritten on subsequent runs, so you can safely @@ -122,19 +121,19 @@ Sphinx supports various `builders`_, see a few suggestions: :: - sphinx-build -b html . _build/html + sphinx-build -b html /html .. admonition:: Build an ePUB3 formatted file, usable on ebook readers :: - sphinx-build -b epub3 . _build/epub + sphinx-build -b epub3 /epub .. admonition:: Build a PDF file, which incidentally also builds a LaTeX file :: - sphinx-build -b latex %s _build/latex && make -C _build/latex all-pdf + sphinx-build -b latex /latex && make -C /latex all-pdf .. _Sphinx: https://www.sphinx-doc.org/ diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index a1591aa24..39d088860 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -20,6 +20,7 @@ import re import textwrap from functools import partial from http import HTTPStatus +from pathlib import Path import pytest @@ -594,3 +595,51 @@ class TestTranslation: assert bing.translate( textwrap.dedent(initial_lyrics) ) == textwrap.dedent(expected) + + +class TestRestFiles: + @pytest.fixture + def rest_dir(self, tmp_path): + return tmp_path + + @pytest.fixture + def rest_files(self, rest_dir): + return lyrics.RestFiles(rest_dir) + + def test_write(self, rest_dir: Path, rest_files): + items = [ + Item(albumartist=aa, album=a, title=t, lyrics=lyr) + for aa, a, t, lyr in [ + ("Artist One", "Album One", "Song One", "Lyrics One"), + ("Artist One", "Album One", "Song Two", "Lyrics Two"), + ("Artist Two", "Album Two", "Song Three", "Lyrics Three"), + ] + ] + + rest_files.write(items) + + assert (rest_dir / "index.rst").exists() + assert (rest_dir / "conf.py").exists() + + artist_one_file = rest_dir / "artists" / "artist-one.rst" + artist_two_file = rest_dir / "artists" / "artist-two.rst" + assert artist_one_file.exists() + assert artist_two_file.exists() + + c = artist_one_file.read_text() + assert ( + c.index("Artist One") + < c.index("Album One") + < c.index("Song One") + < c.index("Lyrics One") + < c.index("Song Two") + < c.index("Lyrics Two") + ) + + c = artist_two_file.read_text() + assert ( + c.index("Artist Two") + < c.index("Album Two") + < c.index("Song Three") + < c.index("Lyrics Three") + ) From 7893766e4cc02fe5965cfacb07f44400ec9c478b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Wed, 23 Oct 2024 13:06:21 +0100 Subject: [PATCH 3/6] Improve flags structure and add tests --- beetsplug/lyrics.py | 73 ++++++++++++++++++------------------- docs/plugins/lyrics.rst | 2 + test/plugins/test_lyrics.py | 40 ++++++++++++++++++-- 3 files changed, 74 insertions(+), 41 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 7e09cc0fe..21a164b8d 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -44,7 +44,7 @@ if TYPE_CHECKING: from logging import Logger from beets.importer import ImportTask - from beets.library import Item + from beets.library import Item, Library from ._typing import ( GeniusAPI, @@ -947,7 +947,6 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): def __init__(self): super().__init__() - self.import_stages = [self.imported] self.config.add( { "auto": True, @@ -966,6 +965,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): "fallback": None, "force": False, "local": False, + "print": False, "synced": False, # Musixmatch is disabled by default as they are currently blocking # requests with the beets user agent. @@ -979,14 +979,16 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): self.config["google_engine_ID"].redact = True self.config["genius_api_key"].redact = True + if self.config["auto"]: + self.import_stages = [self.imported] + def commands(self): cmd = ui.Subcommand("lyrics", help="fetch song lyrics") cmd.parser.add_option( "-p", "--print", - dest="printlyr", action="store_true", - default=False, + default=self.config["print"].get(), help="print lyrics to console", ) cmd.parser.add_option( @@ -1001,34 +1003,27 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): cmd.parser.add_option( "-f", "--force", - dest="force_refetch", action="store_true", - default=False, + default=self.config["force"].get(), help="always re-download lyrics", ) cmd.parser.add_option( "-l", "--local", - dest="local_only", action="store_true", - default=False, + default=self.config["local"].get(), help="do not fetch missing lyrics", ) - def func(lib, opts, args): + def func(lib: Library, opts, args) -> None: # The "write to files" option corresponds to the # import_write config value. - items = list(lib.items(ui.decargs(args))) + self.config.set(vars(opts)) + items = list(lib.items(args)) for item in items: - if not opts.local_only and not self.config["local"]: - self.fetch_item_lyrics( - item, - ui.should_write(), - opts.force_refetch or self.config["force"], - ) - if item.lyrics: - if opts.printlyr: - ui.print_(item.lyrics) + self.add_item_lyrics(item, ui.should_write()) + if item.lyrics and opts.print: + ui.print_(item.lyrics) if opts.rest_directory and ( items := [i for i in items if i.lyrics] @@ -1040,32 +1035,34 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): def imported(self, _, task: ImportTask) -> None: """Import hook for fetching lyrics automatically.""" - if self.config["auto"]: - for item in task.imported_items(): - self.fetch_item_lyrics(item, False, self.config["force"]) + for item in task.imported_items(): + self.add_item_lyrics(item, False) - def fetch_item_lyrics(self, item: Item, write: bool, force: bool) -> None: + def find_lyrics(self, item: Item) -> str: + album, length = item.album, round(item.length) + matches = ( + [ + lyrics + for t in titles + if (lyrics := self.get_lyrics(a, t, album, length)) + ] + for a, titles in search_pairs(item) + ) + + return "\n\n---\n\n".join(next(filter(None, matches), [])) + + def add_item_lyrics(self, item: Item, write: bool) -> None: """Fetch and store lyrics for a single item. If ``write``, then the lyrics will also be written to the file itself. """ - # Skip if the item already has lyrics. - if not force and item.lyrics: + if self.config["local"]: + return + + if not self.config["force"] and item.lyrics: self.info("🔵 Lyrics already present: {}", item) return - lyrics_matches = [] - album, length = item.album, round(item.length) - for artist, titles in search_pairs(item): - lyrics_matches = [ - self.get_lyrics(artist, title, album, length) - for title in titles - ] - if any(lyrics_matches): - break - - lyrics = "\n\n---\n\n".join(filter(None, lyrics_matches)) - - if lyrics: + if lyrics := self.find_lyrics(item): self.info("🟢 Found lyrics: {0}", item) if translator := self.translator: initial_lyrics = lyrics diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index 15ddea450..a20f97faf 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -47,6 +47,7 @@ Default configuration: force: no google_API_key: null google_engine_ID: 009217259823014548361:lndtuqkycfu + print: no sources: [lrclib, google, genius, tekstowo] synced: no @@ -75,6 +76,7 @@ The available options are: - **google_engine_ID**: The custom search engine to use. Default: The `beets custom search engine`_, which gathers an updated list of sources known to be scrapeable. +- **print**: Print lyrics to the console. - **sources**: List of sources to search for lyrics. An asterisk ``*`` expands to all available sources. The ``google`` source will be automatically deactivated if no ``google_API_key`` is setup. diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 39d088860..55fc3a30a 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -25,7 +25,7 @@ from pathlib import Path import pytest from beets.library import Item -from beets.test.helper import PluginMixin +from beets.test.helper import PluginMixin, TestHelper from beetsplug import lyrics from .lyrics_pages import LyricsPage, lyrics_pages @@ -42,6 +42,14 @@ PHRASE_BY_TITLE = { } +@pytest.fixture(scope="module") +def helper(): + helper = TestHelper() + helper.setup_beets() + yield helper + helper.teardown_beets() + + class TestLyricsUtils: @pytest.mark.parametrize( "artist, title", @@ -240,6 +248,27 @@ class TestLyricsPlugin(LyricsPluginMixin): assert last_log assert re.search(expected_log_match, last_log, re.I) + @pytest.mark.parametrize( + "plugin_config, found, expected", + [ + ({}, "new", "old"), + ({"force": True}, "new", "new"), + ({"force": True, "local": True}, "new", "old"), + ({"force": True, "fallback": None}, "", "old"), + ({"force": True, "fallback": ""}, "", ""), + ({"force": True, "fallback": "default"}, "", "default"), + ], + ) + def test_overwrite_config( + self, monkeypatch, helper, lyrics_plugin, found, expected + ): + monkeypatch.setattr(lyrics_plugin, "find_lyrics", lambda _: found) + item = helper.create_item(id=1, lyrics="old") + + lyrics_plugin.add_item_lyrics(item, False) + + assert item.lyrics == expected + class LyricsBackendTest(LyricsPluginMixin): @pytest.fixture @@ -289,8 +318,13 @@ class TestLyricsSources(LyricsBackendTest): def test_backend_source(self, lyrics_plugin, lyrics_page: LyricsPage): """Test parsed lyrics from each of the configured lyrics pages.""" - lyrics_info = lyrics_plugin.get_lyrics( - lyrics_page.artist, lyrics_page.track_title, "", 186 + lyrics_info = lyrics_plugin.find_lyrics( + Item( + artist=lyrics_page.artist, + title=lyrics_page.track_title, + album="", + length=186.0, + ) ) assert lyrics_info From 43032f7bc7be2eccc71be31ccd0c122a04d7b122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sat, 26 Oct 2024 14:50:22 +0100 Subject: [PATCH 4/6] translations: make sure we do not re-translate --- beetsplug/lyrics.py | 42 ++++++++++++++++++++++++++----------- test/plugins/test_lyrics.py | 15 ++++++++++--- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 21a164b8d..d7a29050e 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -744,6 +744,7 @@ class Google(SearchBackend): class Translator(RequestHandler): TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + remove_translations = partial(re.compile(r" / [^\n]+").sub, "") _log: Logger api_key: str @@ -811,23 +812,45 @@ class Translator(RequestHandler): # only add the space between non-empty timestamps and texts return [" ".join(filter(None, p)) for p in zip(timestamps, texts)] - def translate(self, lyrics: str) -> str: + def translate(self, new_lyrics: str, old_lyrics: str) -> str: """Translate the given lyrics to the target language. + Check old lyrics for existing translations and return them if their + original text matches the new lyrics. This is to avoid translating + the same lyrics multiple times. + If the lyrics are already in the target language or not in any of of the source languages (if configured), they are returned as is. The footer with the source URL is preserved, if present. """ - lyrics_language = langdetect.detect(lyrics).upper() - if lyrics_language == self.to_language or ( - self.from_languages and lyrics_language not in self.from_languages + if ( + " / " in old_lyrics + and self.remove_translations(old_lyrics) == new_lyrics ): - return lyrics + self.info("🔵 Translations already exist") + return old_lyrics - lyrics, *url = lyrics.split("\n\nSource: ") + lyrics_language = langdetect.detect(new_lyrics).upper() + if lyrics_language == self.to_language: + self.info( + "🔵 Lyrics are already in the target language {}", + self.to_language, + ) + return new_lyrics + + if self.from_languages and lyrics_language not in self.from_languages: + self.info( + "🔵 Configuration {} does not permit translating from {}", + self.from_languages, + lyrics_language, + ) + return new_lyrics + + lyrics, *url = new_lyrics.split("\n\nSource: ") with self.handle_request(): translated_lines = self.append_translations(lyrics.splitlines()) + self.info("🟢 Translated lyrics to {}", self.to_language) return "\n\nSource: ".join(["\n".join(translated_lines), *url]) @@ -1065,12 +1088,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): if lyrics := self.find_lyrics(item): self.info("🟢 Found lyrics: {0}", item) if translator := self.translator: - initial_lyrics = lyrics - if (lyrics := translator.translate(lyrics)) != initial_lyrics: - self.info( - "🟢 Added translation to {}", - self.config["translate_to"].get().upper(), - ) + lyrics = translator.translate(lyrics, item.lyrics) else: self.info("🔴 Lyrics not found: {}", item) lyrics = self.config["fallback"].get() diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 55fc3a30a..c4daa17ba 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -583,7 +583,7 @@ class TestTranslation: requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback) @pytest.mark.parametrize( - "initial_lyrics, expected", + "new_lyrics, old_lyrics, expected", [ pytest.param( """ @@ -592,6 +592,7 @@ class TestTranslation: My body wouldn't let me hide it (Hide it) No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) Ridin' through the thunder, lightnin'""", + "", """ [Refrain: Doja Cat] / [Refrain : Doja Cat] Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir) @@ -607,6 +608,7 @@ class TestTranslation: [00:01.00] Some more synced lyrics Source: https://lrclib.net/api/123""", + "", """ [00:00.00] Some synced lyrics / Quelques paroles synchronisées [00:00:50] @@ -617,17 +619,24 @@ class TestTranslation: ), pytest.param( "Quelques paroles", + "", "Quelques paroles", id="already in the target language", ), + pytest.param( + "Some lyrics", + "Some lyrics / Some translation", + "Some lyrics / Some translation", + id="already translated", + ), ], ) - def test_translate(self, initial_lyrics, expected): + def test_translate(self, new_lyrics, old_lyrics, expected): plugin = lyrics.LyricsPlugin() bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"]) assert bing.translate( - textwrap.dedent(initial_lyrics) + textwrap.dedent(new_lyrics), old_lyrics ) == textwrap.dedent(expected) From b713d72612f752becd6c9834b11e5fdec2654df6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 13 Jan 2025 01:48:29 +0000 Subject: [PATCH 5/6] translations: use a more distinctive separator I found that the translator would sometimes replace the pipe character with another symbol (maybe it got confused thinking the character is part of the text?). Added spaces around the pipe to make it more clear that it's definitely the separator. --- beetsplug/lyrics.py | 7 +++++-- test/plugins/test_lyrics.py | 18 +++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index d7a29050e..cb48e2424 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -744,6 +744,7 @@ class Google(SearchBackend): class Translator(RequestHandler): TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + SEPARATOR = " | " remove_translations = partial(re.compile(r" / [^\n]+").sub, "") _log: Logger @@ -773,14 +774,16 @@ class Translator(RequestHandler): map the translations back to the original texts. """ unique_texts = list(dict.fromkeys(texts)) + text = self.SEPARATOR.join(unique_texts) data: list[TranslatorAPI.Response] = self.post_json( self.TRANSLATE_URL, headers={"Ocp-Apim-Subscription-Key": self.api_key}, - json=[{"text": "|".join(unique_texts)}], + json=[{"text": text}], params={"api-version": "3.0", "to": self.to_language}, ) - translations = data[0]["translations"][0]["text"].split("|") + translated_text = data[0]["translations"][0]["text"] + translations = translated_text.split(self.SEPARATOR) trans_by_text = dict(zip(unique_texts, translations)) return list(zip(texts, (trans_by_text.get(t, "") for t in texts))) diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index c4daa17ba..74e727099 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -554,23 +554,23 @@ class TestTranslation: if b"Refrain" in request.body: translations = ( "" - "|[Refrain : Doja Cat]" - "|Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501 - "|Mon corps ne me laissait pas le cacher (Cachez-le)" - "|Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501 - "|Chevauchant à travers le tonnerre, la foudre" + " | [Refrain : Doja Cat]" + " | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501 + " | Mon corps ne me laissait pas le cacher (Cachez-le)" + " | Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501 + " | Chevauchant à travers le tonnerre, la foudre" ) elif b"00:00.00" in request.body: translations = ( "" - "|[00:00.00] Quelques paroles synchronisées" - "|[00:01.00] Quelques paroles plus synchronisées" + " | [00:00.00] Quelques paroles synchronisées" + " | [00:01.00] Quelques paroles plus synchronisées" ) else: translations = ( "" - "|Quelques paroles synchronisées" - "|Quelques paroles plus synchronisées" + " | Quelques paroles synchronisées" + " | Quelques paroles plus synchronisées" ) return [ From 4da72cb5f8622e97984199209b7126f5907f53d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 10 Feb 2025 02:09:14 +0000 Subject: [PATCH 6/6] Ignore _typing.py coverage --- setup.cfg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ba580f2c9..3e2fee46a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,7 +15,9 @@ markers = data_file = .reports/coverage/data branch = true relative_files = true -omit = beets/test/* +omit = + beets/test/* + beetsplug/_typing.py [coverage:report] precision = 2