From 24fa9aa22d14ea038564c55c6f035d9b59611e12 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Fri, 23 Sep 2016 13:11:52 -0500 Subject: [PATCH] Use a namedtuple for chapters --- leech.py | 8 ++++---- sites/__init__.py | 6 +++++- sites/ao3.py | 4 ++-- sites/deviantart.py | 3 ++- sites/fanfictionnet.py | 10 +++++----- sites/stash.py | 4 ++-- sites/xenforo.py | 5 +++-- 7 files changed, 23 insertions(+), 17 deletions(-) diff --git a/leech.py b/leech.py index b5763e2..895a900 100755 --- a/leech.py +++ b/leech.py @@ -90,7 +90,7 @@ def leech(url, session, filename=None, args=None): if not story: raise Exception("Couldn't extract story") - dates = [c[2] for c in story['chapters'] if c[2]] + dates = [c.date for c in story['chapters'] if c.date] metadata = { 'title': story['title'], 'author': story['author'], @@ -105,11 +105,11 @@ def leech(url, session, filename=None, args=None): html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata))) - for i, (chapter_title, chapter_html, chapter_date) in enumerate(story['chapters']): + for i, chapter in enumerate(story['chapters']): html.append(( - chapter_title, + chapter.title, 'chapter%d.html' % (i + 1), - html_template.format(title=chapter_title, text=chapter_html) + html_template.format(title=chapter.title, text=chapter.contents) )) if 'footnotes' in story and story['footnotes']: diff --git a/sites/__init__.py b/sites/__init__.py index 104cc19..71ee57e 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -2,11 +2,15 @@ import glob import os import argparse +import collections from bs4 import BeautifulSoup _sites = [] +Chapter = collections.namedtuple('Chapter', ['title', 'contents', 'date']) + + class Site: """A Site handles checking whether a URL might represent a site, and then extracting the content of a story from said site. @@ -30,7 +34,7 @@ class Site: story (dict) containing keys: title (string) author (string) - chapters (list): list of tuples, in form (title, HTML, datetime) + chapters (list): list of Chapters (namedtuple, defined above) """ raise NotImplementedError() diff --git a/sites/ao3.py b/sites/ao3.py index 93d1a2a..b66f801 100644 --- a/sites/ao3.py +++ b/sites/ao3.py @@ -2,7 +2,7 @@ import datetime import re -from . import register, Site, SiteException +from . import register, Site, SiteException, Chapter @register @@ -37,7 +37,7 @@ class ArchiveOfOurOwn(Site): "(%Y-%m-%d)" ) - chapters.append((link.string, self._chapter(chapter_url), updated)) + chapters.append(Chapter(title=link.string, contents=self._chapter(chapter_url), date=updated)) if not chapters: raise SiteException("No content") diff --git a/sites/deviantart.py b/sites/deviantart.py index 0303026..1b69b7a 100644 --- a/sites/deviantart.py +++ b/sites/deviantart.py @@ -2,9 +2,10 @@ import re -from . import register, Site, SiteException +from . import register from .stash import Stash + @register class DeviantArt(Stash): @staticmethod diff --git a/sites/fanfictionnet.py b/sites/fanfictionnet.py index 7cbedc7..83552d4 100644 --- a/sites/fanfictionnet.py +++ b/sites/fanfictionnet.py @@ -2,7 +2,7 @@ import datetime import re -from . import register, Site, SiteException +from . import register, Site, SiteException, Chapter @register @@ -45,11 +45,11 @@ class FanFictionNet(Site): # beautiful soup doesn't handle ffn's unclosed option tags at all well here options = re.findall(r']*>([^<]+)', str(chapter_select)) for option in options: - chapters.append((option[1], self._chapter(base_url + option[0]), False)) - chapters[-1] = (chapters[-1][0], chapters[-1][1], updated) - chapters[0] = (chapters[0][0], chapters[0][1], published) + chapters.append(Chapter(title=option[1], contents=self._chapter(base_url + option[0]), date=False)) + chapters[-1] = Chapter(title=chapters[-1].title, contents=chapters[-1].contents, date=updated) + chapters[0] = Chapter(title=chapters[0].title, contents=chapters[0].contents, date=published) else: - chapters.append((story['title'], self._extract_chapter(url), published)) + chapters.append(Chapter(title=story['title'], contents=self._extract_chapter(url), date=published)) story['chapters'] = chapters diff --git a/sites/stash.py b/sites/stash.py index f4df8b1..7204fac 100644 --- a/sites/stash.py +++ b/sites/stash.py @@ -2,7 +2,7 @@ import datetime import re -from . import register, Site, SiteException +from . import register, Site, SiteException, Chapter @register @@ -59,7 +59,7 @@ class Stash(Site): except Exception as e: raise SiteException("Trouble cleaning attributes", e) - return (title, text.prettify(), self._date(soup)) + return Chapter(title=title, contents=text.prettify(), date=self._date(soup)) def _date(self, soup): maybe_date = soup.find('div', class_="dev-metainfo-details").find('span', ts=True) diff --git a/sites/xenforo.py b/sites/xenforo.py index aa965f9..6f323d2 100644 --- a/sites/xenforo.py +++ b/sites/xenforo.py @@ -2,7 +2,7 @@ import datetime import re -from . import register, Site, SiteException +from . import register, Site, SiteException, Chapter class XenForo(Site): @@ -41,7 +41,8 @@ class XenForo(Site): if not href.startswith('http'): href = base + href print("Fetching chapter", mark.string, href) - chapters.append((str(mark.string),) + self._chapter(href, idx)) + contents, post_date = self._chapter(href, idx) + chapters.append(Chapter(title=str(mark.string), contents=contents, date=post_date)) story['chapters'] = chapters story['footnotes'] = '\n\n'.join(self.footnotes)