translations: use a more distinctive separator

I found that the translator would sometimes replace the pipe character
with another symbol (maybe it got confused thinking the character is
part of the text?).

Added spaces around the pipe to make it more clear that it's definitely
the separator.
This commit is contained in:
Šarūnas Nejus 2025-01-13 01:48:29 +00:00
parent 43032f7bc7
commit b713d72612
No known key found for this signature in database
GPG key ID: DD28F6704DBE3435
2 changed files with 14 additions and 11 deletions

View file

@ -744,6 +744,7 @@ class Google(SearchBackend):
class Translator(RequestHandler):
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
SEPARATOR = " | "
remove_translations = partial(re.compile(r" / [^\n]+").sub, "")
_log: Logger
@ -773,14 +774,16 @@ class Translator(RequestHandler):
map the translations back to the original texts.
"""
unique_texts = list(dict.fromkeys(texts))
text = self.SEPARATOR.join(unique_texts)
data: list[TranslatorAPI.Response] = self.post_json(
self.TRANSLATE_URL,
headers={"Ocp-Apim-Subscription-Key": self.api_key},
json=[{"text": "|".join(unique_texts)}],
json=[{"text": text}],
params={"api-version": "3.0", "to": self.to_language},
)
translations = data[0]["translations"][0]["text"].split("|")
translated_text = data[0]["translations"][0]["text"]
translations = translated_text.split(self.SEPARATOR)
trans_by_text = dict(zip(unique_texts, translations))
return list(zip(texts, (trans_by_text.get(t, "") for t in texts)))

View file

@ -554,23 +554,23 @@ class TestTranslation:
if b"Refrain" in request.body:
translations = (
""
"|[Refrain : Doja Cat]"
"|Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501
"|Mon corps ne me laissait pas le cacher (Cachez-le)"
"|Quoi quil arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501
"|Chevauchant à travers le tonnerre, la foudre"
" | [Refrain : Doja Cat]"
" | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501
" | Mon corps ne me laissait pas le cacher (Cachez-le)"
" | Quoi quil arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501
" | Chevauchant à travers le tonnerre, la foudre"
)
elif b"00:00.00" in request.body:
translations = (
""
"|[00:00.00] Quelques paroles synchronisées"
"|[00:01.00] Quelques paroles plus synchronisées"
" | [00:00.00] Quelques paroles synchronisées"
" | [00:01.00] Quelques paroles plus synchronisées"
)
else:
translations = (
""
"|Quelques paroles synchronisées"
"|Quelques paroles plus synchronisées"
" | Quelques paroles synchronisées"
" | Quelques paroles plus synchronisées"
)
return [