From c69eb1e33ee47cc5ae027aab34dde3213f77a172 Mon Sep 17 00:00:00 2001
From: David Lynch <kemayo@gmail.com>
Date: Mon, 30 Nov 2015 20:10:58 -0600
Subject: [PATCH] Footnotes off in their own file

---
 epub.py           | 10 ++++++----
 leech.py          |  3 +++
 sites/__init__.py | 38 ++++++++++++++++++++++++++++++++++++++
 sites/xenforo.py  | 37 ++++++++++---------------------------
 4 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/epub.py b/epub.py
index a86d231..d442cd1 100644
--- a/epub.py
+++ b/epub.py
@@ -13,23 +13,25 @@ a bit of metadata thrown in for good measure.
 This totally started from http://www.manuel-strehl.de/dev/simple_epub_ebooks_with_python.en.html
 """
 
+
 def sanitize_filename(s):
     """Take a string and return a valid filename constructed from the string.
     Uses a whitelist approach: any characters not present in valid_chars are
     removed. Also spaces are replaced with underscores.
-     
+
     Note: this method may produce invalid filenames such as ``, `.` or `..`
     When I use this method I prepend a date string like '2009_01_15_19_46_32_'
     and append a file extension like '.txt', so I avoid the potential of using
     an invalid filename.
-     
+
     """
     valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
     filename = ''.join(c for c in s if c in valid_chars)
-    filename = filename.replace(' ','_') # I don't like spaces in filenames.
+    filename = filename.replace(' ', '_')  # I don't like spaces in filenames.
     return filename
 
-def make_epub(filename, html_files, meta, extra_files = False):
+
+def make_epub(filename, html_files, meta, extra_files=False):
     unique_id = meta.get('unique_id', False)
     if not unique_id:
         unique_id = 'leech_book_' + str(uuid.uuid4())
diff --git a/leech.py b/leech.py
index a184b8c..35dc617 100755
--- a/leech.py
+++ b/leech.py
@@ -74,6 +74,9 @@ def leech(url, filename=None, cache=True):
     for i, chapter in enumerate(story['chapters']):
         html.append((chapter[0], 'chapter%d.html' % (i + 1), html_template.format(title=chapter[0], text=chapter[1])))
 
+    if 'footnotes' in story and story['footnotes']:
+        html.append(("Footnotes", 'footnotes.html', html_template.format(title="Footnotes", text=story['footnotes'])))
+
     css = ('Styles/base.css', fetch('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css'), 'text/css')
     cover_image = ('images/cover.png', cover.make_cover(story['title'], story['author']).read(), 'image/png')
 
diff --git a/sites/__init__.py b/sites/__init__.py
index 8a1cbaf..0efb3ef 100644
--- a/sites/__init__.py
+++ b/sites/__init__.py
@@ -3,6 +3,7 @@ from bs4 import BeautifulSoup
 
 _sites = []
 
+
 class Site:
     """A Site handles checking whether a URL might represent a site, and then
     extracting the content of a story from said site.
@@ -11,6 +12,7 @@ class Site:
         super().__init__()
         self.fetch = fetch
         self.cache = cache
+        self.footnotes = []
 
     @staticmethod
     def matches(url):
@@ -32,13 +34,49 @@ class Site:
         soup = BeautifulSoup("", 'html5lib')
         return soup.new_tag(*args, **kw)
 
+    def _footnote(self, contents, backlink_href=''):
+        """Register a footnote and return a link to that footnote"""
+
+        idx = len(self.footnotes) + 1
+
+        # epub spec footnotes are all about epub:type on the footnote and the link
+        # http://www.idpf.org/accessibility/guidelines/content/semantics/epub-type.php
+        contents.name = 'div'
+        contents.attrs['id'] = "footnote%d" % idx
+        contents.attrs['epub:type'] = 'rearnote'
+
+        # a backlink is essential for Kindle to think of this as a footnote
+        # otherwise it doesn't get the inline-popup treatment
+        # http://kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.pdf
+        # section 3.9.10
+        backlink = self._new_tag('a', href="%s#noteback%d" % (backlink_href, idx))
+        backlink.string = '^'
+        contents.insert(0, backlink)
+
+        self.footnotes.append(contents.prettify())
+
+        # now build the link to the footnote to return, with appropriate
+        # epub annotations.
+        spoiler_link = self._new_tag('a')
+        spoiler_link.attrs = {
+            'id': 'noteback%d' % idx,
+            'href': "footnotes.html#footnote%d" % idx,
+            'epub:type': 'noteref',
+        }
+        spoiler_link.string = str(idx)
+
+        return spoiler_link
+
+
 class SiteException(Exception):
     pass
 
+
 def register(site_class):
     _sites.append(site_class)
     return site_class
 
+
 def get(url):
     for site_class in _sites:
         if site_class.matches(url):
diff --git a/sites/xenforo.py b/sites/xenforo.py
index a7b496c..5846581 100644
--- a/sites/xenforo.py
+++ b/sites/xenforo.py
@@ -35,16 +35,18 @@ class XenForo(Site):
         marks = self._chapter_list(url)
 
         chapters = []
-        for mark in marks:
+        for idx, mark in enumerate(marks, 1):
             href = mark.get('href')
             if '/members' in href:
                 continue
             if not href.startswith('http'):
                 href = base + href
             print("Fetching chapter", mark.string, href)
-            chapters.append((str(mark.string),) + self._chapter(href))
+            chapters.append((str(mark.string),) + self._chapter(href, idx))
 
         story['chapters'] = chapters
+        story['footnotes'] = '\n\n'.join(self.footnotes)
+        self.footnotes = []
 
         return story
 
@@ -82,10 +84,10 @@ class XenForo(Site):
 
         return links
 
-    def _chapter(self, url):
+    def _chapter(self, url, chapter_number):
         post = self._post_from_url(url)
 
-        return self._clean_chapter(post), self._post_date(post)
+        return self._clean_chapter(post, chapter_number), self._post_date(post)
 
     def _post_from_url(self, url):
         # URLs refer to specific posts, so get just that one
@@ -103,37 +105,18 @@ class XenForo(Site):
         # just the first one in the thread, then
         return soup.find('li', class_='message')
 
-    def _clean_chapter(self, post):
+    def _clean_chapter(self, post, chapter_number):
         post = post.find('blockquote', class_='messageText')
         post.name = 'div'
         # mostly, we want to remove colors because the Kindle is terrible at them
         for tag in post.find_all(style=True):
             del(tag['style'])
         # spoilers don't work well, so turn them into epub footnotes
-        spoiler_holder = False
         for idx, spoiler in enumerate(post.find_all(class_='ToggleTriggerAnchor')):
-            if not spoiler_holder:
-                spoiler_holder = self._new_tag('section')
-                post.append(spoiler_holder)
-            contents = spoiler.find(class_='SpoilerTarget')
-            contents.name = 'aside'
-            contents.attrs['id'] = "spoiler%d" % idx
-            contents.attrs['epub:type'] = 'footnote'
-            backlink = self._new_tag('a', href="#spoiler%dx" % idx)
-            backlink.string = '^'
-            contents.insert(0, backlink)
-            spoiler_holder.append(contents)
-
+            link = self._footnote(spoiler.find(class_='SpoilerTarget').extract(), 'chapter%d.html' % chapter_number)
+            link.string = spoiler.find(class_='SpoilerTitle').get_text()
             new_spoiler = self._new_tag('div')
-            spoiler_link = self._new_tag('a')
-            spoiler_link.attrs = {
-                'id': 'spoiler%dx' % idx,
-                'href': "#spoiler%d" % idx,
-                'epub:type': 'noteref',
-            }
-            spoiler_link.string = spoiler.find(class_='SpoilerTitle').get_text()
-            new_spoiler.append(spoiler_link)
-
+            new_spoiler.append(link)
             spoiler.replace_with(new_spoiler)
         return post.prettify()