diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 668bc055..66891775 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -337,11 +337,13 @@ keep_summary_html:true ## Some attributes cause problems for EBook readers. By default, ## FanFicFare will remove all attributes except the ones specified ## from all tags. (The only exception is that tags will also -## keep src, alt and longdesc attributes.) +## keep src, alt and longdesc attributes. data-orighref is used by +## internalize_text_links to preserve links when chapters are +## inserted.) ## Example: To add 'style', 'title' and 'align' to the list to keep, ## in your personal.ini [defaults] put: ## add_to_keep_html_attrs:,style,title,align -keep_html_attrs:href,name,class,id,colspan,rowspan +keep_html_attrs:href,name,class,id,colspan,rowspan,data-orighref ## Tags listed here will be replaced with . ## For example: underlined text becomes diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py index 73969478..3310ebff 100644 --- a/fanficfare/adapters/base_adapter.py +++ b/fanficfare/adapters/base_adapter.py @@ -451,7 +451,7 @@ class BaseSiteAdapter(Configurable): if not fetch: fetch=self._fetchUrlRaw - acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id']) + acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id','data-orighref']) if self.getConfig("keep_style_attr"): acceptable_attributes.append('style') diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 5dffb2c9..47364b9b 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -340,11 +340,13 @@ keep_summary_html:true ## Some attributes cause problems for EBook readers. By default, ## FanFicFare will remove all attributes except the ones specified ## from all tags. (The only exception is that tags will also -## keep src, alt and longdesc attributes.) +## keep src, alt and longdesc attributes. data-orighref is used by +## internalize_text_links to preserve links when chapters are +## inserted.) ## Example: To add 'style', 'title' and 'align' to the list to keep, ## in your personal.ini [defaults] put: ## add_to_keep_html_attrs:,style,title,align -keep_html_attrs:href,name,class,id,colspan,rowspan +keep_html_attrs:href,name,class,id,colspan,rowspan,data-orighref ## Tags listed here will be replaced with . ## For example: underlined text becomes diff --git a/fanficfare/writers/writer_epub.py b/fanficfare/writers/writer_epub.py index 2046cd1c..1b740fb6 100644 --- a/fanficfare/writers/writer_epub.py +++ b/fanficfare/writers/writer_epub.py @@ -700,14 +700,27 @@ div { margin: 0pt; padding: 0pt; } CHAPTER_END = self.EPUB_CHAPTER_END for index, chap in enumerate(self.story.getChapters()): # (url,title,html) + logger.debug("chapter:%s %s %s"%(len(chap['html']), chap['title'],chap['url'])) if chap['html']: chap_data = chap['html'] if self.getConfig('internalize_text_links'): soup = bs4.BeautifulSoup(chap['html'],'html5lib') changed=False for alink in soup.find_all('a'): - if alink.has_attr('href') and alink['href'] in chapurlmap: + ## Chapters can be inserted in the middle + ## which can break existing internal links. + ## So let's save the original href and update. + logger.debug("found %s"%alink) + if alink.has_attr('data-orighref') and alink['data-orighref'] in chapurlmap: + alink['href']=chapurlmap[alink['data-orighref']] + logger.debug("set1 %s"%alink) + changed=True + elif alink.has_attr('href') and alink['href'] in chapurlmap: + if not alink['href'].startswith('file'): + # only save orig href if not already internal. + alink['data-orighref']=alink['href'] alink['href']=chapurlmap[alink['href']] + logger.debug("set2 %s"%alink) changed=True if changed: chap_data = unicode(soup) @@ -734,6 +747,7 @@ div { margin: 0pt; padding: 0pt; } # (200k+) fullhtml = re.sub(r'(

|
)\n*',r'\1\n',fullhtml) + logger.debug("write OEBPS/file%s.xhtml"%chap['index04']) outputepub.writestr("OEBPS/file%s.xhtml"%chap['index04'],fullhtml.encode('utf-8')) del fullhtml diff --git a/fanficfare/writers/writer_html.py b/fanficfare/writers/writer_html.py index 04615394..54de45bd 100644 --- a/fanficfare/writers/writer_html.py +++ b/fanficfare/writers/writer_html.py @@ -146,6 +146,8 @@ ${output_css} if chap['html']: chap_data = chap['html'] if self.getConfig('internalize_text_links'): + # html doesn't need data-orighref because it + # doesn't do updates. soup = bs4.BeautifulSoup(chap['html'],'html5lib') changed=False for alink in soup.find_all('a'):