mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-09 05:32:57 +02:00
Change line length amount to work with duplicates removed lengths. Enhance the unwrapping regex to account for more cases.
This commit is contained in:
parent
e426c9e60d
commit
3a87f0d065
1 changed files with 2 additions and 2 deletions
|
|
@ -183,12 +183,12 @@ def __call__(self, html, remove_special_chars=None):
|
|||
elif self.is_book_designer(html):
|
||||
rules = self.BOOK_DESIGNER
|
||||
elif self.is_pdftohtml(html):
|
||||
length = line_length(html, .3)
|
||||
length = line_length(html, .5)
|
||||
line_length_rules = []
|
||||
if length:
|
||||
line_length_rules = [
|
||||
# Un wrap using punctuation
|
||||
(re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % length, re.UNICODE), wrap_lines),
|
||||
(re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
|
||||
]
|
||||
|
||||
rules = self.PDFTOHTML + line_length_rules
|
||||
|
|
|
|||
Loading…
Reference in a new issue