mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-09 02:53:38 +02:00
Handle HTML entities in the builtin S&R funcs
This commit is contained in:
parent
ef3509ebc5
commit
48a701012c
1 changed files with 8 additions and 4 deletions
|
|
@ -9,7 +9,7 @@
|
|||
import re, os
|
||||
from bisect import bisect
|
||||
|
||||
from calibre import guess_type as _guess_type
|
||||
from calibre import guess_type as _guess_type, prepare_string_for_xml, replace_entities
|
||||
|
||||
def guess_type(x):
|
||||
return _guess_type(x)[0] or 'application/octet-stream'
|
||||
|
|
@ -172,12 +172,16 @@ def parse_css(data, fname='<string>', is_declaration=False, decode=None, log_lev
|
|||
data = parser.parseString(data, href=fname, validate=False)
|
||||
return data
|
||||
|
||||
def apply_func_to_match_groups(match, func=icu_upper):
|
||||
def handle_entities(text, func):
|
||||
return prepare_string_for_xml(func(replace_entities(text)))
|
||||
|
||||
def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_entities):
|
||||
'''Apply the specified function to individual groups in the match object (the result of re.search() or
|
||||
the whole match if no groups were defined. Returns the replaced string.'''
|
||||
found_groups = False
|
||||
i = 0
|
||||
parts, pos = [], match.start()
|
||||
f = lambda text:handle_entities(text, func)
|
||||
while True:
|
||||
i += 1
|
||||
try:
|
||||
|
|
@ -187,10 +191,10 @@ def apply_func_to_match_groups(match, func=icu_upper):
|
|||
found_groups = True
|
||||
if start > -1:
|
||||
parts.append(match.string[pos:start])
|
||||
parts.append(func(match.string[start:end]))
|
||||
parts.append(f(match.string[start:end]))
|
||||
pos = end
|
||||
if not found_groups:
|
||||
return func(match.group())
|
||||
return f(match.group())
|
||||
parts.append(match.string[pos:match.end()])
|
||||
return ''.join(parts)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue