base_adapter were removing empty paragraphs used as section breaks on some sites (ie. AO3)

This commit is contained in:
asbjorn grandt 2013-11-01 08:08:44 +01:00
parent 3b04b6ad61
commit c6ddd8e6d7

View file

@ -352,8 +352,8 @@ class BaseSiteAdapter(Configurable):
if t.name in ('center'):
t['class']=t.name
t.name='div'
# removes paired, but empty tags.
if t.string != None and len(t.string.strip()) == 0 :
# removes paired, but empty non paragraph tags.
if t.name not in ('p') and t.string != None and len(t.string.strip()) == 0 :
t.extract()
retval = soup.__str__('utf8').decode('utf-8')