mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-01-18 23:43:50 +01:00
Revert previous changes, now looking for entities in unwrapping rule
This commit is contained in:
parent
66b443adc5
commit
569b84e1cb
1 changed files with 3 additions and 1 deletions
|
|
@ -17,6 +17,8 @@
|
|||
result_exceptions = {
|
||||
u'<' : '<',
|
||||
u'>' : '>',
|
||||
u"'" : ''',
|
||||
u'"' : '"',
|
||||
u'&' : '&',
|
||||
})
|
||||
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
||||
|
|
@ -349,7 +351,7 @@ def __call__(self, html, remove_special_chars=None,
|
|||
# print "The pdf line length returned is " + str(length)
|
||||
end_rules.append(
|
||||
# Un wrap using punctuation
|
||||
(re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
||||
(re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
||||
)
|
||||
|
||||
for rule in self.PREPROCESS + start_rules:
|
||||
|
|
|
|||
Loading…
Reference in a new issue