mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-17 06:13:13 +01:00
222 lines
7.1 KiB
Python
222 lines
7.1 KiB
Python
# Copyright 2021 Sidney Markowitz All Rights Reserved.
|
|
# Distributed under MIT license.
|
|
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
|
|
|
from .dictionary import BrotliDictionary
|
|
"""
|
|
Transformations on dictionary words
|
|
|
|
"""
|
|
|
|
|
|
class Transform:
|
|
def __init__(self, prefix, transform, suffix):
|
|
self.prefix = bytearray(prefix)
|
|
self.transform = transform
|
|
self.suffix = bytearray(suffix)
|
|
|
|
@staticmethod
|
|
def transformDictionaryWord(dst, idx, word, length, transform):
|
|
prefix = kTransforms[transform].prefix
|
|
suffix = kTransforms[transform].suffix
|
|
t = kTransforms[transform].transform
|
|
skip = t < (0 if kOmitFirst1 else (t - (kOmitFirst1 - 1)))
|
|
start_idx = idx
|
|
if skip > length:
|
|
skip = length
|
|
|
|
prefix_pos = 0
|
|
while prefix_pos < len(prefix):
|
|
dst[idx] = prefix[prefix_pos]
|
|
idx += 1
|
|
prefix_pos += 1
|
|
|
|
word += skip
|
|
length -= skip
|
|
|
|
if t <= kOmitLast9:
|
|
length -= t
|
|
|
|
for i in range(0, length):
|
|
dst[idx] = BrotliDictionary.dictionary[word + i]
|
|
idx += 1
|
|
|
|
uppercase = idx - length
|
|
|
|
if t == kUppercaseFirst:
|
|
_to_upper_case(dst, uppercase)
|
|
elif t == kUppercaseAll:
|
|
while length > 0:
|
|
step = _to_upper_case(dst, uppercase)
|
|
uppercase += step
|
|
length -= step
|
|
|
|
suffix_pos = 0
|
|
while suffix_pos < len(suffix):
|
|
dst[idx] = suffix[suffix_pos]
|
|
idx += 1
|
|
suffix_pos += 1
|
|
|
|
return idx - start_idx
|
|
|
|
|
|
kIdentity = 0
|
|
kOmitLast1 = 1
|
|
kOmitLast2 = 2
|
|
kOmitLast3 = 3
|
|
kOmitLast4 = 4
|
|
kOmitLast5 = 5
|
|
kOmitLast6 = 6
|
|
kOmitLast7 = 7
|
|
kOmitLast8 = 8
|
|
kOmitLast9 = 9
|
|
kUppercaseFirst = 10
|
|
kUppercaseAll = 11
|
|
kOmitFirst1 = 12
|
|
kOmitFirst2 = 13
|
|
kOmitFirst3 = 14
|
|
kOmitFirst4 = 15
|
|
kOmitFirst5 = 16
|
|
kOmitFirst6 = 17
|
|
kOmitFirst7 = 18
|
|
kOmitFirst8 = 19
|
|
kOmitFirst9 = 20
|
|
|
|
kTransforms = [
|
|
Transform(b"", kIdentity, b""),
|
|
Transform(b"", kIdentity, b" "),
|
|
Transform(b" ", kIdentity, b" "),
|
|
Transform(b"", kOmitFirst1, b""),
|
|
Transform(b"", kUppercaseFirst, b" "),
|
|
Transform(b"", kIdentity, b" the "),
|
|
Transform(b" ", kIdentity, b""),
|
|
Transform(b"s ", kIdentity, b" "),
|
|
Transform(b"", kIdentity, b" of "),
|
|
Transform(b"", kUppercaseFirst, b""),
|
|
Transform(b"", kIdentity, b" and "),
|
|
Transform(b"", kOmitFirst2, b""),
|
|
Transform(b"", kOmitLast1, b""),
|
|
Transform(b", ", kIdentity, b" "),
|
|
Transform(b"", kIdentity, b", "),
|
|
Transform(b" ", kUppercaseFirst, b" "),
|
|
Transform(b"", kIdentity, b" in "),
|
|
Transform(b"", kIdentity, b" to "),
|
|
Transform(b"e ", kIdentity, b" "),
|
|
Transform(b"", kIdentity, b"\""),
|
|
Transform(b"", kIdentity, b"."),
|
|
Transform(b"", kIdentity, b"\">"),
|
|
Transform(b"", kIdentity, b"\n"),
|
|
Transform(b"", kOmitLast3, b""),
|
|
Transform(b"", kIdentity, b"]"),
|
|
Transform(b"", kIdentity, b" for "),
|
|
Transform(b"", kOmitFirst3, b""),
|
|
Transform(b"", kOmitLast2, b""),
|
|
Transform(b"", kIdentity, b" a "),
|
|
Transform(b"", kIdentity, b" that "),
|
|
Transform(b" ", kUppercaseFirst, b""),
|
|
Transform(b"", kIdentity, b". "),
|
|
Transform(b".", kIdentity, b""),
|
|
Transform(b" ", kIdentity, b", "),
|
|
Transform(b"", kOmitFirst4, b""),
|
|
Transform(b"", kIdentity, b" with "),
|
|
Transform(b"", kIdentity, b"'"),
|
|
Transform(b"", kIdentity, b" from "),
|
|
Transform(b"", kIdentity, b" by "),
|
|
Transform(b"", kOmitFirst5, b""),
|
|
Transform(b"", kOmitFirst6, b""),
|
|
Transform(b" the ", kIdentity, b""),
|
|
Transform(b"", kOmitLast4, b""),
|
|
Transform(b"", kIdentity, b". The "),
|
|
Transform(b"", kUppercaseAll, b""),
|
|
Transform(b"", kIdentity, b" on "),
|
|
Transform(b"", kIdentity, b" as "),
|
|
Transform(b"", kIdentity, b" is "),
|
|
Transform(b"", kOmitLast7, b""),
|
|
Transform(b"", kOmitLast1, b"ing "),
|
|
Transform(b"", kIdentity, b"\n\t"),
|
|
Transform(b"", kIdentity, b":"),
|
|
Transform(b" ", kIdentity, b". "),
|
|
Transform(b"", kIdentity, b"ed "),
|
|
Transform(b"", kOmitFirst9, b""),
|
|
Transform(b"", kOmitFirst7, b""),
|
|
Transform(b"", kOmitLast6, b""),
|
|
Transform(b"", kIdentity, b"("),
|
|
Transform(b"", kUppercaseFirst, b", "),
|
|
Transform(b"", kOmitLast8, b""),
|
|
Transform(b"", kIdentity, b" at "),
|
|
Transform(b"", kIdentity, b"ly "),
|
|
Transform(b" the ", kIdentity, b" of "),
|
|
Transform(b"", kOmitLast5, b""),
|
|
Transform(b"", kOmitLast9, b""),
|
|
Transform(b" ", kUppercaseFirst, b", "),
|
|
Transform(b"", kUppercaseFirst, b"\""),
|
|
Transform(b".", kIdentity, b"("),
|
|
Transform(b"", kUppercaseAll, b" "),
|
|
Transform(b"", kUppercaseFirst, b"\">"),
|
|
Transform(b"", kIdentity, b"=\""),
|
|
Transform(b" ", kIdentity, b"."),
|
|
Transform(b".com/", kIdentity, b""),
|
|
Transform(b" the ", kIdentity, b" of the "),
|
|
Transform(b"", kUppercaseFirst, b"'"),
|
|
Transform(b"", kIdentity, b". This "),
|
|
Transform(b"", kIdentity, b","),
|
|
Transform(b".", kIdentity, b" "),
|
|
Transform(b"", kUppercaseFirst, b"("),
|
|
Transform(b"", kUppercaseFirst, b"."),
|
|
Transform(b"", kIdentity, b" not "),
|
|
Transform(b" ", kIdentity, b"=\""),
|
|
Transform(b"", kIdentity, b"er "),
|
|
Transform(b" ", kUppercaseAll, b" "),
|
|
Transform(b"", kIdentity, b"al "),
|
|
Transform(b" ", kUppercaseAll, b""),
|
|
Transform(b"", kIdentity, b"='"),
|
|
Transform(b"", kUppercaseAll, b"\""),
|
|
Transform(b"", kUppercaseFirst, b". "),
|
|
Transform(b" ", kIdentity, b"("),
|
|
Transform(b"", kIdentity, b"ful "),
|
|
Transform(b" ", kUppercaseFirst, b". "),
|
|
Transform(b"", kIdentity, b"ive "),
|
|
Transform(b"", kIdentity, b"less "),
|
|
Transform(b"", kUppercaseAll, b"'"),
|
|
Transform(b"", kIdentity, b"est "),
|
|
Transform(b" ", kUppercaseFirst, b"."),
|
|
Transform(b"", kUppercaseAll, b"\">"),
|
|
Transform(b" ", kIdentity, b"='"),
|
|
Transform(b"", kUppercaseFirst, b","),
|
|
Transform(b"", kIdentity, b"ize "),
|
|
Transform(b"", kUppercaseAll, b"."),
|
|
Transform(b"\xc2\xa0", kIdentity, b""),
|
|
Transform(b" ", kIdentity, b","),
|
|
Transform(b"", kUppercaseFirst, b"=\""),
|
|
Transform(b"", kUppercaseAll, b"=\""),
|
|
Transform(b"", kIdentity, b"ous "),
|
|
Transform(b"", kUppercaseAll, b", "),
|
|
Transform(b"", kUppercaseFirst, b"='"),
|
|
Transform(b" ", kUppercaseFirst, b","),
|
|
Transform(b" ", kUppercaseAll, b"=\""),
|
|
Transform(b" ", kUppercaseAll, b", "),
|
|
Transform(b"", kUppercaseAll, b","),
|
|
Transform(b"", kUppercaseAll, b"("),
|
|
Transform(b"", kUppercaseAll, b". "),
|
|
Transform(b" ", kUppercaseAll, b"."),
|
|
Transform(b"", kUppercaseAll, b"='"),
|
|
Transform(b" ", kUppercaseAll, b". "),
|
|
Transform(b" ", kUppercaseFirst, b"=\""),
|
|
Transform(b" ", kUppercaseAll, b"='"),
|
|
Transform(b" ", kUppercaseFirst, b"='")
|
|
]
|
|
|
|
kNumTransforms = len(kTransforms)
|
|
|
|
|
|
def _to_upper_case(p, i):
|
|
"""Overly simplified model of uppercase in utf-8, but what RFC7932 specifies to use"""
|
|
if p[i] < 0xc0:
|
|
if 97 <= p[i] <= 122:
|
|
p[i] ^= 32
|
|
return 1
|
|
if p[i] < 0xe0:
|
|
p[i + 1] ^= 32
|
|
return 2
|
|
p[i + 2] ^= 5
|
|
return 3
|