diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini
index 668bc055..66891775 100644
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@@ -337,11 +337,13 @@ keep_summary_html:true
## Some attributes cause problems for EBook readers. By default,
## FanFicFare will remove all attributes except the ones specified
## from all tags. (The only exception is that tags will also
-## keep src, alt and longdesc attributes.)
+## keep src, alt and longdesc attributes. data-orighref is used by
+## internalize_text_links to preserve links when chapters are
+## inserted.)
## Example: To add 'style', 'title' and 'align' to the list to keep,
## in your personal.ini [defaults] put:
## add_to_keep_html_attrs:,style,title,align
-keep_html_attrs:href,name,class,id,colspan,rowspan
+keep_html_attrs:href,name,class,id,colspan,rowspan,data-orighref
## Tags listed here will be replaced with .
## For example: underlined text becomes
diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py
index 73969478..3310ebff 100644
--- a/fanficfare/adapters/base_adapter.py
+++ b/fanficfare/adapters/base_adapter.py
@@ -451,7 +451,7 @@ class BaseSiteAdapter(Configurable):
if not fetch:
fetch=self._fetchUrlRaw
- acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id'])
+ acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id','data-orighref'])
if self.getConfig("keep_style_attr"):
acceptable_attributes.append('style')
diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini
index 5dffb2c9..47364b9b 100644
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@@ -340,11 +340,13 @@ keep_summary_html:true
## Some attributes cause problems for EBook readers. By default,
## FanFicFare will remove all attributes except the ones specified
## from all tags. (The only exception is that
tags will also
-## keep src, alt and longdesc attributes.)
+## keep src, alt and longdesc attributes. data-orighref is used by
+## internalize_text_links to preserve links when chapters are
+## inserted.)
## Example: To add 'style', 'title' and 'align' to the list to keep,
## in your personal.ini [defaults] put:
## add_to_keep_html_attrs:,style,title,align
-keep_html_attrs:href,name,class,id,colspan,rowspan
+keep_html_attrs:href,name,class,id,colspan,rowspan,data-orighref
## Tags listed here will be replaced with .
## For example: underlined text becomes
diff --git a/fanficfare/writers/writer_epub.py b/fanficfare/writers/writer_epub.py
index 2046cd1c..1b740fb6 100644
--- a/fanficfare/writers/writer_epub.py
+++ b/fanficfare/writers/writer_epub.py
@@ -700,14 +700,27 @@ div { margin: 0pt; padding: 0pt; }
CHAPTER_END = self.EPUB_CHAPTER_END
for index, chap in enumerate(self.story.getChapters()): # (url,title,html)
+ logger.debug("chapter:%s %s %s"%(len(chap['html']), chap['title'],chap['url']))
if chap['html']:
chap_data = chap['html']
if self.getConfig('internalize_text_links'):
soup = bs4.BeautifulSoup(chap['html'],'html5lib')
changed=False
for alink in soup.find_all('a'):
- if alink.has_attr('href') and alink['href'] in chapurlmap:
+ ## Chapters can be inserted in the middle
+ ## which can break existing internal links.
+ ## So let's save the original href and update.
+ logger.debug("found %s"%alink)
+ if alink.has_attr('data-orighref') and alink['data-orighref'] in chapurlmap:
+ alink['href']=chapurlmap[alink['data-orighref']]
+ logger.debug("set1 %s"%alink)
+ changed=True
+ elif alink.has_attr('href') and alink['href'] in chapurlmap:
+ if not alink['href'].startswith('file'):
+ # only save orig href if not already internal.
+ alink['data-orighref']=alink['href']
alink['href']=chapurlmap[alink['href']]
+ logger.debug("set2 %s"%alink)
changed=True
if changed:
chap_data = unicode(soup)
@@ -734,6 +747,7 @@ div { margin: 0pt; padding: 0pt; }
# (200k+)
fullhtml = re.sub(r'(