From 95e25dabd3160b4ee8b1ad6db57332fc49987980 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Mon, 30 Nov 2015 16:46:29 -0600 Subject: [PATCH] First pass at turning spoilers into footnotes for Xenforo This works as popup-footnotes in iBooks and on Kindle. It'd be a bit better if I put the footnotes in their own file, so they won't be dropped at the end of chapters on a Kindle. However, that requires some flow restructuring, and this is an acceptable proof-of-concept for now. --- leech.py | 2 +- sites/__init__.py | 4 ++++ sites/xenforo.py | 28 +++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/leech.py b/leech.py index d5c9e27..a184b8c 100755 --- a/leech.py +++ b/leech.py @@ -13,7 +13,7 @@ from fetch import Fetch fetch = Fetch("leech") html_template = ''' - + {title} diff --git a/sites/__init__.py b/sites/__init__.py index 0f537a4..8a1cbaf 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -28,6 +28,10 @@ class Site: raise SiteException("Couldn't fetch", url) return BeautifulSoup(page, method) + def _new_tag(self, *args, **kw): + soup = BeautifulSoup("", 'html5lib') + return soup.new_tag(*args, **kw) + class SiteException(Exception): pass diff --git a/sites/xenforo.py b/sites/xenforo.py index 5f8dfd3..a7b496c 100644 --- a/sites/xenforo.py +++ b/sites/xenforo.py @@ -98,7 +98,7 @@ class XenForo(Site): soup = self._soup(url, 'html5lib') if postid: - return soup.find('li', id='post-'+postid) + return soup.find('li', id='post-' + postid) # just the first one in the thread, then return soup.find('li', class_='message') @@ -109,6 +109,32 @@ class XenForo(Site): # mostly, we want to remove colors because the Kindle is terrible at them for tag in post.find_all(style=True): del(tag['style']) + # spoilers don't work well, so turn them into epub footnotes + spoiler_holder = False + for idx, spoiler in enumerate(post.find_all(class_='ToggleTriggerAnchor')): + if not spoiler_holder: + spoiler_holder = self._new_tag('section') + post.append(spoiler_holder) + contents = spoiler.find(class_='SpoilerTarget') + contents.name = 'aside' + contents.attrs['id'] = "spoiler%d" % idx + contents.attrs['epub:type'] = 'footnote' + backlink = self._new_tag('a', href="#spoiler%dx" % idx) + backlink.string = '^' + contents.insert(0, backlink) + spoiler_holder.append(contents) + + new_spoiler = self._new_tag('div') + spoiler_link = self._new_tag('a') + spoiler_link.attrs = { + 'id': 'spoiler%dx' % idx, + 'href': "#spoiler%d" % idx, + 'epub:type': 'noteref', + } + spoiler_link.string = spoiler.find(class_='SpoilerTitle').get_text() + new_spoiler.append(spoiler_link) + + spoiler.replace_with(new_spoiler) return post.prettify() def _post_date(self, post):