mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-09 01:14:36 +02:00
EPUB Output: Strip <form> tags since ADE runs screaming when it sees one. Fixes #2029 (IHT resetting P505)
This commit is contained in:
parent
a52286c594
commit
74486fc40d
4 changed files with 13 additions and 4 deletions
|
|
@ -197,6 +197,9 @@ def fix_markup(self):
|
|||
if not tag.text and not tag.get('src', False):
|
||||
tag.getparent().remove(tag)
|
||||
|
||||
for tag in self.root.xpath('//form'):
|
||||
tag.getparent().remove(tag)
|
||||
|
||||
if self.opts.linearize_tables:
|
||||
for tag in self.root.xpath('//table | //tr | //th | //td'):
|
||||
tag.tag = 'div'
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ def populate_from_feed(self, feed, title=None, oldest_article=7,
|
|||
if len(self.articles) >= max_articles_per_feed:
|
||||
break
|
||||
self.parse_article(item)
|
||||
|
||||
|
||||
|
||||
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
|
||||
max_articles_per_feed=100):
|
||||
|
|
@ -156,7 +156,6 @@ def parse_article(self, item):
|
|||
content = None
|
||||
if not link and not content:
|
||||
return
|
||||
|
||||
article = Article(id, title, link, description, published, content)
|
||||
delta = datetime.utcnow() - article.utctime
|
||||
if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:
|
||||
|
|
|
|||
|
|
@ -1012,7 +1012,8 @@ def parse_feeds(self):
|
|||
feed.description = unicode(err)
|
||||
parsed_feeds.append(feed)
|
||||
self.log_exception(msg)
|
||||
|
||||
|
||||
|
||||
return parsed_feeds
|
||||
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
'''
|
||||
iht.com
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
|
@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe):
|
|||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer'}),
|
||||
dict(name=['form'])]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<!-- webtrends.*', re.DOTALL),
|
||||
lambda m:'</body></html>')
|
||||
]
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||
|
||||
feeds = [
|
||||
|
|
|
|||
Loading…
Reference in a new issue