mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Remember original href in data-orighref attr with epub internalize_text_links so inserted 'earlier' chapters don't break internal links.
This commit is contained in:
parent
3488d35c1f
commit
5e0a036814
5 changed files with 26 additions and 6 deletions
|
|
@ -337,11 +337,13 @@ keep_summary_html:true
|
|||
## Some attributes cause problems for EBook readers. By default,
|
||||
## FanFicFare will remove all attributes except the ones specified
|
||||
## from all tags. (The only exception is that <img> tags will also
|
||||
## keep src, alt and longdesc attributes.)
|
||||
## keep src, alt and longdesc attributes. data-orighref is used by
|
||||
## internalize_text_links to preserve links when chapters are
|
||||
## inserted.)
|
||||
## Example: To add 'style', 'title' and 'align' to the list to keep,
|
||||
## in your personal.ini [defaults] put:
|
||||
## add_to_keep_html_attrs:,style,title,align
|
||||
keep_html_attrs:href,name,class,id,colspan,rowspan
|
||||
keep_html_attrs:href,name,class,id,colspan,rowspan,data-orighref
|
||||
|
||||
## Tags listed here will be replaced with <span class="tagname">.
|
||||
## For example: <u>underlined text</u> becomes
|
||||
|
|
|
|||
|
|
@ -451,7 +451,7 @@ class BaseSiteAdapter(Configurable):
|
|||
if not fetch:
|
||||
fetch=self._fetchUrlRaw
|
||||
|
||||
acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id'])
|
||||
acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id','data-orighref'])
|
||||
|
||||
if self.getConfig("keep_style_attr"):
|
||||
acceptable_attributes.append('style')
|
||||
|
|
|
|||
|
|
@ -340,11 +340,13 @@ keep_summary_html:true
|
|||
## Some attributes cause problems for EBook readers. By default,
|
||||
## FanFicFare will remove all attributes except the ones specified
|
||||
## from all tags. (The only exception is that <img> tags will also
|
||||
## keep src, alt and longdesc attributes.)
|
||||
## keep src, alt and longdesc attributes. data-orighref is used by
|
||||
## internalize_text_links to preserve links when chapters are
|
||||
## inserted.)
|
||||
## Example: To add 'style', 'title' and 'align' to the list to keep,
|
||||
## in your personal.ini [defaults] put:
|
||||
## add_to_keep_html_attrs:,style,title,align
|
||||
keep_html_attrs:href,name,class,id,colspan,rowspan
|
||||
keep_html_attrs:href,name,class,id,colspan,rowspan,data-orighref
|
||||
|
||||
## Tags listed here will be replaced with <span class="tagname">.
|
||||
## For example: <u>underlined text</u> becomes
|
||||
|
|
|
|||
|
|
@ -700,14 +700,27 @@ div { margin: 0pt; padding: 0pt; }
|
|||
CHAPTER_END = self.EPUB_CHAPTER_END
|
||||
|
||||
for index, chap in enumerate(self.story.getChapters()): # (url,title,html)
|
||||
logger.debug("chapter:%s %s %s"%(len(chap['html']), chap['title'],chap['url']))
|
||||
if chap['html']:
|
||||
chap_data = chap['html']
|
||||
if self.getConfig('internalize_text_links'):
|
||||
soup = bs4.BeautifulSoup(chap['html'],'html5lib')
|
||||
changed=False
|
||||
for alink in soup.find_all('a'):
|
||||
if alink.has_attr('href') and alink['href'] in chapurlmap:
|
||||
## Chapters can be inserted in the middle
|
||||
## which can break existing internal links.
|
||||
## So let's save the original href and update.
|
||||
logger.debug("found %s"%alink)
|
||||
if alink.has_attr('data-orighref') and alink['data-orighref'] in chapurlmap:
|
||||
alink['href']=chapurlmap[alink['data-orighref']]
|
||||
logger.debug("set1 %s"%alink)
|
||||
changed=True
|
||||
elif alink.has_attr('href') and alink['href'] in chapurlmap:
|
||||
if not alink['href'].startswith('file'):
|
||||
# only save orig href if not already internal.
|
||||
alink['data-orighref']=alink['href']
|
||||
alink['href']=chapurlmap[alink['href']]
|
||||
logger.debug("set2 %s"%alink)
|
||||
changed=True
|
||||
if changed:
|
||||
chap_data = unicode(soup)
|
||||
|
|
@ -734,6 +747,7 @@ div { margin: 0pt; padding: 0pt; }
|
|||
# (200k+)
|
||||
fullhtml = re.sub(r'(</p>|<br ?/>)\n*',r'\1\n',fullhtml)
|
||||
|
||||
logger.debug("write OEBPS/file%s.xhtml"%chap['index04'])
|
||||
outputepub.writestr("OEBPS/file%s.xhtml"%chap['index04'],fullhtml.encode('utf-8'))
|
||||
del fullhtml
|
||||
|
||||
|
|
|
|||
|
|
@ -146,6 +146,8 @@ ${output_css}
|
|||
if chap['html']:
|
||||
chap_data = chap['html']
|
||||
if self.getConfig('internalize_text_links'):
|
||||
# html doesn't need data-orighref because it
|
||||
# doesn't do updates.
|
||||
soup = bs4.BeautifulSoup(chap['html'],'html5lib')
|
||||
changed=False
|
||||
for alink in soup.find_all('a'):
|
||||
|
|
|
|||
Loading…
Reference in a new issue