diff --git a/src/calibre/ebooks/BeautifulSoup.py b/src/calibre/ebooks/BeautifulSoup.py index c19f4c0a11..ecdfec486b 100644 --- a/src/calibre/ebooks/BeautifulSoup.py +++ b/src/calibre/ebooks/BeautifulSoup.py @@ -972,6 +972,7 @@ class BeautifulStoneSoup(Tag, SGMLParser): NESTABLE_TAGS = {} RESET_NESTING_TAGS = {} QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = frozenset() MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), lambda x: x.group(1) + ' />'), @@ -1155,7 +1156,10 @@ def pushTag(self, tag): def endData(self, containerClass=NavigableString): if self.currentData: currentData = ''.join(self.currentData) - if not currentData.translate(self.STRIP_ASCII_SPACES): + # Changed by Kovid to not clobber whitespace inside
tags and the like
+ if ( (not currentData.translate(self.STRIP_ASCII_SPACES)) and (
+ not frozenset(tag.name for tag in self.tagStack).intersection(
+ self.PRESERVE_WHITESPACE_TAGS))):
if '\n' in currentData:
currentData = '\n'
else:
@@ -1443,6 +1447,8 @@ def __init__(self, *args, **kwargs):
['br' , 'hr', 'input', 'img', 'meta',
'spacer', 'link', 'frame', 'base'])
+ PRESERVE_WHITESPACE_TAGS = frozenset(('pre', 'textarea'))
+
QUOTE_TAGS = {'script' : None, 'textarea' : None}
#According to the HTML standard, each of these inline tags can