From 24fa9aa22d14ea038564c55c6f035d9b59611e12 Mon Sep 17 00:00:00 2001
From: David Lynch <kemayo@gmail.com>
Date: Fri, 23 Sep 2016 13:11:52 -0500
Subject: [PATCH] Use a namedtuple for chapters

---
 leech.py               |  8 ++++----
 sites/__init__.py      |  6 +++++-
 sites/ao3.py           |  4 ++--
 sites/deviantart.py    |  3 ++-
 sites/fanfictionnet.py | 10 +++++-----
 sites/stash.py         |  4 ++--
 sites/xenforo.py       |  5 +++--
 7 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/leech.py b/leech.py
index b5763e2..895a900 100755
--- a/leech.py
+++ b/leech.py
@@ -90,7 +90,7 @@ def leech(url, session, filename=None, args=None):
     if not story:
         raise Exception("Couldn't extract story")
 
-    dates = [c[2] for c in story['chapters'] if c[2]]
+    dates = [c.date for c in story['chapters'] if c.date]
     metadata = {
         'title': story['title'],
         'author': story['author'],
@@ -105,11 +105,11 @@ def leech(url, session, filename=None, args=None):
 
     html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
 
-    for i, (chapter_title, chapter_html, chapter_date) in enumerate(story['chapters']):
+    for i, chapter in enumerate(story['chapters']):
         html.append((
-            chapter_title,
+            chapter.title,
             'chapter%d.html' % (i + 1),
-            html_template.format(title=chapter_title, text=chapter_html)
+            html_template.format(title=chapter.title, text=chapter.contents)
         ))
 
     if 'footnotes' in story and story['footnotes']:
diff --git a/sites/__init__.py b/sites/__init__.py
index 104cc19..71ee57e 100644
--- a/sites/__init__.py
+++ b/sites/__init__.py
@@ -2,11 +2,15 @@
 import glob
 import os
 import argparse
+import collections
 from bs4 import BeautifulSoup
 
 _sites = []
 
 
+Chapter = collections.namedtuple('Chapter', ['title', 'contents', 'date'])
+
+
 class Site:
     """A Site handles checking whether a URL might represent a site, and then
     extracting the content of a story from said site.
@@ -30,7 +34,7 @@ class Site:
             story (dict) containing keys:
                 title (string)
                 author (string)
-                chapters (list): list of tuples, in form (title, HTML, datetime)
+                chapters (list): list of Chapters (namedtuple, defined above)
         """
         raise NotImplementedError()
 
diff --git a/sites/ao3.py b/sites/ao3.py
index 93d1a2a..b66f801 100644
--- a/sites/ao3.py
+++ b/sites/ao3.py
@@ -2,7 +2,7 @@
 
 import datetime
 import re
-from . import register, Site, SiteException
+from . import register, Site, SiteException, Chapter
 
 
 @register
@@ -37,7 +37,7 @@ class ArchiveOfOurOwn(Site):
                 "(%Y-%m-%d)"
             )
 
-            chapters.append((link.string, self._chapter(chapter_url), updated))
+            chapters.append(Chapter(title=link.string, contents=self._chapter(chapter_url), date=updated))
 
         if not chapters:
             raise SiteException("No content")
diff --git a/sites/deviantart.py b/sites/deviantart.py
index 0303026..1b69b7a 100644
--- a/sites/deviantart.py
+++ b/sites/deviantart.py
@@ -2,9 +2,10 @@
 
 import re
 
-from . import register, Site, SiteException
+from . import register
 from .stash import Stash
 
+
 @register
 class DeviantArt(Stash):
     @staticmethod
diff --git a/sites/fanfictionnet.py b/sites/fanfictionnet.py
index 7cbedc7..83552d4 100644
--- a/sites/fanfictionnet.py
+++ b/sites/fanfictionnet.py
@@ -2,7 +2,7 @@
 
 import datetime
 import re
-from . import register, Site, SiteException
+from . import register, Site, SiteException, Chapter
 
 
 @register
@@ -45,11 +45,11 @@ class FanFictionNet(Site):
             # beautiful soup doesn't handle ffn's unclosed option tags at all well here
             options = re.findall(r'<option.+?value="?(\d+)"?[^>]*>([^<]+)', str(chapter_select))
             for option in options:
-                chapters.append((option[1], self._chapter(base_url + option[0]), False))
-            chapters[-1] = (chapters[-1][0], chapters[-1][1], updated)
-            chapters[0] = (chapters[0][0], chapters[0][1], published)
+                chapters.append(Chapter(title=option[1], contents=self._chapter(base_url + option[0]), date=False))
+            chapters[-1] = Chapter(title=chapters[-1].title, contents=chapters[-1].contents, date=updated)
+            chapters[0] = Chapter(title=chapters[0].title, contents=chapters[0].contents, date=published)
         else:
-            chapters.append((story['title'], self._extract_chapter(url), published))
+            chapters.append(Chapter(title=story['title'], contents=self._extract_chapter(url), date=published))
 
         story['chapters'] = chapters
 
diff --git a/sites/stash.py b/sites/stash.py
index f4df8b1..7204fac 100644
--- a/sites/stash.py
+++ b/sites/stash.py
@@ -2,7 +2,7 @@
 
 import datetime
 import re
-from . import register, Site, SiteException
+from . import register, Site, SiteException, Chapter
 
 
 @register
@@ -59,7 +59,7 @@ class Stash(Site):
         except Exception as e:
             raise SiteException("Trouble cleaning attributes", e)
 
-        return (title, text.prettify(), self._date(soup))
+        return Chapter(title=title, contents=text.prettify(), date=self._date(soup))
 
     def _date(self, soup):
         maybe_date = soup.find('div', class_="dev-metainfo-details").find('span', ts=True)
diff --git a/sites/xenforo.py b/sites/xenforo.py
index aa965f9..6f323d2 100644
--- a/sites/xenforo.py
+++ b/sites/xenforo.py
@@ -2,7 +2,7 @@
 
 import datetime
 import re
-from . import register, Site, SiteException
+from . import register, Site, SiteException, Chapter
 
 
 class XenForo(Site):
@@ -41,7 +41,8 @@ class XenForo(Site):
             if not href.startswith('http'):
                 href = base + href
             print("Fetching chapter", mark.string, href)
-            chapters.append((str(mark.string),) + self._chapter(href, idx))
+            contents, post_date = self._chapter(href, idx)
+            chapters.append(Chapter(title=str(mark.string), contents=contents, date=post_date))
 
         story['chapters'] = chapters
         story['footnotes'] = '\n\n'.join(self.footnotes)