mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-27 11:16:48 +01:00
Add setting remove_tags:script,style replacing script remove hardcode, adding style remove. Closes #553.
This commit is contained in:
parent
0055978a57
commit
cdf7db07b2
5 changed files with 25 additions and 6 deletions
|
|
@ -385,6 +385,12 @@ replace_tags_with_spans:u,big,small
|
|||
## setting can adjust which tags are kept.
|
||||
keep_empty_tags:p,td,th
|
||||
|
||||
## By default, script and style tags are removed from chapter text as
|
||||
## part of cleaning up the source HTML. Found several book readers
|
||||
## that didn't treat those tags in <body> correctly. Set to empty if
|
||||
## you want to keep those tags.
|
||||
remove_tags:script,style
|
||||
|
||||
## If a chapter range was given, use this pattern for the book title.
|
||||
## replace_metadata and include/exclude will be applied *after* this.
|
||||
## Set to empty value to disable.
|
||||
|
|
@ -1682,7 +1688,7 @@ legend_spoilers:true
|
|||
show_spoiler_tags:false
|
||||
## don't fetch covers marked as nsfw. covers for fiction.live can't be pornographic, but can get very close.
|
||||
show_nsfw_cover_images:false
|
||||
## displays the timestamps on the story chunks, showing when each part went live.
|
||||
## displays the timestamps on the story chunks, showing when each part went live.
|
||||
show_timestamps:false
|
||||
|
||||
## site has more original than fan fiction
|
||||
|
|
|
|||
|
|
@ -409,6 +409,10 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
|
|||
text=u'''
|
||||
<div>
|
||||
<h3 extra="value">Chapter title from site</h3>
|
||||
<style>
|
||||
p { color: red; }
|
||||
body { color:blue; margin: 5%; }
|
||||
</style>
|
||||
<p>chapter URL:'''+url+'''</p>
|
||||
<p style="color:blue;">Timestamp:'''+datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")+'''</p>
|
||||
<p>Lorem '''+self.crazystring+u''' <i>italics</i>, <b>bold</b>, <u>underline</u> consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||
|
|
|
|||
|
|
@ -483,7 +483,7 @@ class BaseSiteAdapter(Configurable):
|
|||
else:
|
||||
## remove all img tags entirely
|
||||
for img in soup.find_all('img'):
|
||||
img.extract()
|
||||
img.decompose()
|
||||
|
||||
for attr in self.get_attr_keys(soup):
|
||||
if attr not in acceptable_attributes:
|
||||
|
|
@ -519,11 +519,13 @@ class BaseSiteAdapter(Configurable):
|
|||
t.name='div'
|
||||
# removes paired, but empty non paragraph tags.
|
||||
if t.name not in self.getConfigList('keep_empty_tags',['p','td','th']) and t.string != None and len(t.string.strip()) == 0 :
|
||||
t.extract()
|
||||
t.decompose()
|
||||
|
||||
# remove script tags cross the board.
|
||||
if t.name=='script':
|
||||
t.extract()
|
||||
# epub readers (Moon+, FBReader & Aldiko at least)
|
||||
# don't like <style> tags in body.
|
||||
if t.name in self.getConfigList('remove_tags',['script','style']):
|
||||
t.decompose()
|
||||
|
||||
except AttributeError as ae:
|
||||
if "%s"%ae != "'NoneType' object has no attribute 'next_element'":
|
||||
|
|
|
|||
|
|
@ -408,6 +408,7 @@ def get_valid_keywords():
|
|||
'keep_html_attrs',
|
||||
'replace_tags_with_spans',
|
||||
'keep_empty_tags',
|
||||
'remove_tags',
|
||||
'keep_summary_html',
|
||||
'logpage_end',
|
||||
'logpage_entries',
|
||||
|
|
|
|||
|
|
@ -388,6 +388,12 @@ replace_tags_with_spans:u,big,small
|
|||
## setting can adjust which tags are kept.
|
||||
keep_empty_tags:p,td,th
|
||||
|
||||
## By default, script and style tags are removed from chapter text as
|
||||
## part of cleaning up the source HTML. Found several book readers
|
||||
## that didn't treat those tags in <body> correctly. Set to empty if
|
||||
## you want to keep those tags.
|
||||
remove_tags:script,style
|
||||
|
||||
## If a chapter range was given, use this pattern for the book title.
|
||||
## replace_metadata and include/exclude will be applied *after* this.
|
||||
## Set to empty value to disable.
|
||||
|
|
@ -1713,7 +1719,7 @@ legend_spoilers:true
|
|||
show_spoiler_tags:false
|
||||
## don't fetch covers marked as nsfw. covers for fiction.live can't be pornographic, but can get very close.
|
||||
show_nsfw_cover_images:false
|
||||
## displays the timestamps on the story chunks, showing when each part went live.
|
||||
## displays the timestamps on the story chunks, showing when each part went live.
|
||||
show_timestamps:false
|
||||
|
||||
## site has more original than fan fiction
|
||||
|
|
|
|||
Loading…
Reference in a new issue