translations: make sure we do not re-translate

This commit is contained in:
Šarūnas Nejus 2024-10-26 14:50:22 +01:00
parent 7893766e4c
commit 43032f7bc7
No known key found for this signature in database
GPG key ID: DD28F6704DBE3435
2 changed files with 42 additions and 15 deletions

View file

@ -744,6 +744,7 @@ class Google(SearchBackend):
class Translator(RequestHandler):
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
remove_translations = partial(re.compile(r" / [^\n]+").sub, "")
_log: Logger
api_key: str
@ -811,23 +812,45 @@ class Translator(RequestHandler):
# only add the space between non-empty timestamps and texts
return [" ".join(filter(None, p)) for p in zip(timestamps, texts)]
def translate(self, lyrics: str) -> str:
def translate(self, new_lyrics: str, old_lyrics: str) -> str:
"""Translate the given lyrics to the target language.
Check old lyrics for existing translations and return them if their
original text matches the new lyrics. This is to avoid translating
the same lyrics multiple times.
If the lyrics are already in the target language or not in any of
of the source languages (if configured), they are returned as is.
The footer with the source URL is preserved, if present.
"""
lyrics_language = langdetect.detect(lyrics).upper()
if lyrics_language == self.to_language or (
self.from_languages and lyrics_language not in self.from_languages
if (
" / " in old_lyrics
and self.remove_translations(old_lyrics) == new_lyrics
):
return lyrics
self.info("🔵 Translations already exist")
return old_lyrics
lyrics, *url = lyrics.split("\n\nSource: ")
lyrics_language = langdetect.detect(new_lyrics).upper()
if lyrics_language == self.to_language:
self.info(
"🔵 Lyrics are already in the target language {}",
self.to_language,
)
return new_lyrics
if self.from_languages and lyrics_language not in self.from_languages:
self.info(
"🔵 Configuration {} does not permit translating from {}",
self.from_languages,
lyrics_language,
)
return new_lyrics
lyrics, *url = new_lyrics.split("\n\nSource: ")
with self.handle_request():
translated_lines = self.append_translations(lyrics.splitlines())
self.info("🟢 Translated lyrics to {}", self.to_language)
return "\n\nSource: ".join(["\n".join(translated_lines), *url])
@ -1065,12 +1088,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
if lyrics := self.find_lyrics(item):
self.info("🟢 Found lyrics: {0}", item)
if translator := self.translator:
initial_lyrics = lyrics
if (lyrics := translator.translate(lyrics)) != initial_lyrics:
self.info(
"🟢 Added translation to {}",
self.config["translate_to"].get().upper(),
)
lyrics = translator.translate(lyrics, item.lyrics)
else:
self.info("🔴 Lyrics not found: {}", item)
lyrics = self.config["fallback"].get()

View file

@ -583,7 +583,7 @@ class TestTranslation:
requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback)
@pytest.mark.parametrize(
"initial_lyrics, expected",
"new_lyrics, old_lyrics, expected",
[
pytest.param(
"""
@ -592,6 +592,7 @@ class TestTranslation:
My body wouldn't let me hide it (Hide it)
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold)
Ridin' through the thunder, lightnin'""",
"",
"""
[Refrain: Doja Cat] / [Refrain : Doja Cat]
Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)
@ -607,6 +608,7 @@ class TestTranslation:
[00:01.00] Some more synced lyrics
Source: https://lrclib.net/api/123""",
"",
"""
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
[00:00:50]
@ -617,17 +619,24 @@ class TestTranslation:
),
pytest.param(
"Quelques paroles",
"",
"Quelques paroles",
id="already in the target language",
),
pytest.param(
"Some lyrics",
"Some lyrics / Some translation",
"Some lyrics / Some translation",
id="already translated",
),
],
)
def test_translate(self, initial_lyrics, expected):
def test_translate(self, new_lyrics, old_lyrics, expected):
plugin = lyrics.LyricsPlugin()
bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"])
assert bing.translate(
textwrap.dedent(initial_lyrics)
textwrap.dedent(new_lyrics), old_lyrics
) == textwrap.dedent(expected)