diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py
index c6bae9c4..5f9102ba 100644
--- a/fanficfare/adapters/base_adapter.py
+++ b/fanficfare/adapters/base_adapter.py
@@ -698,29 +698,49 @@ class BaseSiteAdapter(Requestable):
# logger.info("Parsing for normalize_text_links failed...")
try:
- for t in soup.findAll(recursive=True):
- for attr in self.get_attr_keys(t):
- if attr not in acceptable_attributes:
- del t[attr] ## strip all tag attributes except acceptable_attributes
+ # python doesn't have a do-while loop.
+ found_empty=True
+ do_resoup=False
+ while found_empty==True:
+ found_empty=False
+ if do_resoup:
+ # re-soup when empty tags removed before looking
+ # for more because multiple 'whitespace' strings
+ # show up differently and doing stripHTML() also
+ # catches
etc.
+ soup = BeautifulSoup(unicode(soup),'html5lib')
+ for t in soup.findAll(recursive=True):
+ for attr in self.get_attr_keys(t):
+ if attr not in acceptable_attributes:
+ del t[attr] ## strip all tag attributes except acceptable_attributes
- # these are not acceptable strict XHTML. But we do already have
- # CSS classes of the same names defined
- if t and hasattr(t,'name') and t.name is not None:
- if t.name in self.getConfigList('replace_tags_with_spans',['u']):
- t['class']=t.name
- t.name='span'
- if t.name in ('center'):
- t['class']=t.name
- t.name='div'
- # removes paired, but empty non paragraph tags.
- if t.name not in self.getConfigList('keep_empty_tags',['p','td','th']) and t.string != None and len(t.string.strip()) == 0 :
- t.decompose()
+ if t and hasattr(t,'name') and t.name is not None:
+ # remove script tags cross the board.
+ # epub readers (Moon+, FBReader & Aldiko at least)
+ # don't like