Add keep_html_attrs and replace_tags_with_spans features.

This commit is contained in:
Jim Miller 2015-12-13 10:42:42 -06:00
parent 8683c59d80
commit b314c915d4
4 changed files with 65 additions and 8 deletions

View file

@ -266,12 +266,32 @@ keep_summary_html:true
## If set true, any style attributes on tags in the story HTML will be
## kept. Useful for keeping extra colors & formatting from original.
## DEPRECATED - use keep_html_attrs (or add_to_keep_html_attrs) instead.
#keep_style_attr: false
## If set true, any title attributes on tags in the story HTML will be
## kept. Some sites(AO3) use <a title="Translation">Foreign language</a>
## DEPRECATED - use keep_html_attrs (or add_to_keep_html_attrs) instead.
#keep_title_attr: false
## Some attributes cause problems for EBook readers. By default,
## FanFicFare will remove all attributes except the ones specified
## from all tags. (The only exception is that <img> tags will also
## keep src, alt and longdesc attributes.)
## Example: To add 'style', 'title' and 'align' to the list to keep,
## in your personal.ini [defaults] put:
## add_to_keep_html_attrs:,style,title,align
keep_html_attrs:href,name,class,id
## Tags listed here will be replaced with <span class="tagname">.
## For example: <u>underlined text</u> becomes
## <span class="u">underlined text</span>
## Note that the output_css should contain the class .u, .big, etc for
## the spans to be useful.
## This feature is for replacing old tags deprecated/removed in newer
## HTML and EPUB standards.
replace_tags_with_spans:u,big,small
## If a chapter range was given, use this pattern for the book title.
## replace_metadata and include/exclude will be applied *after* this.
## Set to empty value to disable.
@ -507,6 +527,8 @@ output_css:
.smcap {font-variant: small-caps;}
.u {text-decoration: underline;}
.bold {font-weight: bold;}
.big { font-size: larger; }
.small { font-size: smaller; }
[txt]
## Add URLs since there aren't links.
@ -587,6 +609,8 @@ output_css:
.smcap {font-variant: small-caps;}
.u {text-decoration: underline;}
.bold {font-weight: bold;}
.big { font-size: larger; }
.small { font-size: smaller; }
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.

View file

@ -561,11 +561,13 @@ class BaseSiteAdapter(Configurable):
if not fetch:
fetch=self._fetchUrlRaw
acceptable_attributes = ['href','name','class','id']
acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id'])
if self.getConfig("keep_style_attr"):
acceptable_attributes.append('style')
if self.getConfig("keep_title_attr"):
acceptable_attributes.append('title')
#print("include_images:"+self.getConfig('include_images'))
if self.getConfig('include_images'):
acceptable_attributes.extend(('src','alt','longdesc'))
@ -590,7 +592,7 @@ class BaseSiteAdapter(Configurable):
# these are not acceptable strict XHTML. But we do already have
# CSS classes of the same names defined
if t and hasattr(t,'name') and t.name is not None:
if t.name in ('u'):
if t.name in self.getConfigList('replace_tags_with_spans',['u']):
t['class']=t.name
t.name='span'
if t.name in ('center'):

View file

@ -111,6 +111,8 @@ def get_valid_list_entries():
'authorId',
'authorUrl',
'lastupdate',
'keep_html_attrs',
'replace_tags_with_spans',
])
boollist=['true','false']
@ -273,6 +275,8 @@ def get_valid_keywords():
'join_string_authorHTML',
'keep_style_attr',
'keep_title_attr',
'keep_html_attrs',
'replace_tags_with_spans',
'keep_summary_html',
'logpage_end',
'logpage_entries',
@ -449,15 +453,18 @@ class Configuration(ConfigParser.SafeConfigParser):
return val
# split and strip each.
def get_config_list(self, sections, key):
def get_config_list(self, sections, key, default=[]):
vlist = re.split(r'(?<!\\),',self.get_config(sections,key)) # don't split on \,
vlist = filter( lambda x : x !='', [ v.strip().replace('\,',',') for v in vlist ])
#print "vlist("+key+"):"+str(vlist)
return vlist
if not vlist:
return default
else:
return vlist
# used by adapters & writers, non-convention naming style
def getConfigList(self, key):
return self.get_config_list(self.sectionslist, key)
def getConfigList(self, key, default=[]):
return self.get_config_list(self.sectionslist, key, default)
# Moved here for test_config.
def get_generate_cover_settings(self):
@ -690,8 +697,8 @@ class Configurable(object):
def get_config(self, sections, key, default=""):
return self.configuration.get_config(sections,key,default)
def getConfigList(self, key):
return self.configuration.getConfigList(key)
def getConfigList(self, key, default=[]):
return self.configuration.getConfigList(key,default)
def get_config_list(self, sections, key):
return self.configuration.get_config_list(sections,key)

View file

@ -275,12 +275,32 @@ keep_summary_html:true
## If set true, any style attributes on tags in the story HTML will be
## kept. Useful for keeping extra colors & formatting from original.
## DEPRECATED - use keep_html_attrs (or add_to_keep_html_attrs) instead.
#keep_style_attr: false
## If set true, any title attributes on tags in the story HTML will be
## kept. Some sites(AO3) use <a title="Translation">Foreign language</a>
## DEPRECATED - use keep_html_attrs (or add_to_keep_html_attrs) instead.
#keep_title_attr: false
## Some attributes cause problems for EBook readers. By default,
## FanFicFare will remove all attributes except the ones specified
## from all tags. (The only exception is that <img> tags will also
## keep src, alt and longdesc attributes.)
## Example: To add 'style', 'title' and 'align' to the list to keep,
## in your personal.ini [defaults] put:
## add_to_keep_html_attrs:,style,title,align
keep_html_attrs:href,name,class,id
## Tags listed here will be replaced with <span class="tagname">.
## For example: <u>underlined text</u> becomes
## <span class="u">underlined text</span>
## Note that the output_css should contain the class .u, .big, etc for
## the spans to be useful.
## This feature is for replacing old tags deprecated/removed in newer
## HTML and EPUB standards.
replace_tags_with_spans:u,big,small
## If a chapter range was given, use this pattern for the book title.
## replace_metadata and include/exclude will be applied *after* this.
## Set to empty value to disable.
@ -511,6 +531,8 @@ output_css:
.smcap {font-variant: small-caps;}
.u {text-decoration: underline;}
.bold {font-weight: bold;}
.big { font-size: larger; }
.small { font-size: smaller; }
[txt]
## Add URLs since there aren't links.
@ -592,6 +614,8 @@ output_css:
.smcap {font-variant: small-caps;}
.u {text-decoration: underline;}
.bold {font-weight: bold;}
.big { font-size: larger; }
.small { font-size: smaller; }
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.