From 5bd07a5b90c2de6743c0da95d9a12e1b942474f6 Mon Sep 17 00:00:00 2001
From: Will Oursler <woursler@google.com>
Date: Sat, 7 Oct 2017 13:01:44 -0400
Subject: [PATCH 1/2] Splits out ebook generation logic into a seperate module,
 in anticipation of maybe supporting multiple output formats.

---
 ebook/__init__.py          | 106 +++++++++++++++++++++++++++++++++++++
 cover.py => ebook/cover.py |   2 +-
 epub.py => ebook/epub.py   |   0
 leech.py                   | 102 +----------------------------------
 sites/__init__.py          |   2 +
 sites/ao3.py               |   3 +-
 sites/arbitrary.py         |   3 +-
 sites/deviantart.py        |   3 +-
 sites/fanfictionnet.py     |   3 +-
 sites/stash.py             |   3 +-
 sites/xenforo.py           |   3 +-
 11 files changed, 123 insertions(+), 107 deletions(-)
 create mode 100644 ebook/__init__.py
 rename cover.py => ebook/cover.py (91%)
 rename epub.py => ebook/epub.py (100%)
diff --git a/ebook/__init__.py b/ebook/__init__.py
new file mode 100644
index 0000000..2091ca2
--- /dev/null
+++ b/ebook/__init__.py
@@ -0,0 +1,106 @@
+from .epub import *
+from .cover import *
+
+import os
+import datetime
+import requests
+
+html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
+<head>
+    <title>{title}</title>
+    <link rel="stylesheet" type="text/css" href="../Styles/base.css" />
+</head>
+<body>
+<h1>{title}</h1>
+{text}
+</body>
+</html>
+'''
+
+cover_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+    <title>Cover</title>
+    <link rel="stylesheet" type="text/css" href="Styles/base.css" />
+</head>
+<body>
+<div class="cover">
+<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
+    width="100%" height="100%" viewBox="0 0 573 800" preserveAspectRatio="xMidYMid meet">
+<image width="600" height="800" xlink:href="images/cover.png" />
+</svg>
+</div>
+</body>
+</html>
+'''
+
+frontmatter_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+    <title>Front Matter</title>
+    <link rel="stylesheet" type="text/css" href="Styles/base.css" />
+</head>
+<body>
+<div class="cover title">
+    <h1>{title}<br />By {author}</h1>
+    <dl>
+        <dt>Source</dt>
+        <dd>{unique_id}</dd>
+        <dt>Started</dt>
+        <dd>{started:%Y-%m-%d}</dd>
+        <dt>Updated</dt>
+        <dd>{updated:%Y-%m-%d}</dd>
+        <dt>Downloaded on</dt>
+        <dd>{now:%Y-%m-%d}</dd>
+    </dl>
+</div>
+</body>
+</html>
+'''
+
+
+def chapter_html(story, titleprefix=None):
+    chapters = []
+    for i, chapter in enumerate(story):
+        if hasattr(chapter, '__iter__'):
+            # This is a Section
+            chapters.extend(chapter_html(chapter, titleprefix=chapter.title))
+        else:
+            title = titleprefix and '{}: {}'.format(titleprefix, chapter.title) or chapter.title
+            chapters.append((
+                title,
+                '{}/chapter{}.html'.format(story.id, i + 1),
+                html_template.format(title=title, text=chapter.contents)
+            ))
+    if story.footnotes:
+        chapters.append(("Footnotes", '{}/footnotes.html'.format(story.id), html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
+    return chapters
+
+
+def generate_epub(story, output_filename = None):
+  dates = list(story.dates())
+  metadata = {
+    'title': story.title,
+    'author': story.author,
+    'unique_id': story.url,
+    'started': min(dates),
+    'updated': max(dates),
+  }
+
+  # The cover is static, and the only change comes from the image which we generate
+  html = [('Cover', 'cover.html', cover_template)]
+
+  cover_image = ('images/cover.png', cover.make_cover(story.title, story.author).read(), 'image/png')
+
+  html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
+
+  html.extend(chapter_html(story))
+
+  css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')
+
+  output_filename = output_filename or story.title + '.epub'
+
+  output_filename = epub.make_epub(output_filename, html, metadata, extra_files=(css, cover_image))
+
+  return output_filename
diff --git a/cover.py b/ebook/cover.py
similarity index 91%
rename from cover.py
rename to ebook/cover.py
index 1107101..1d21668 100644
--- a/cover.py
+++ b/ebook/cover.py
@@ -4,7 +4,7 @@ from io import BytesIO
 import textwrap
 
 
-def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30):
+def make_cover(title, author, width=600, height=800, fontname="FreeSans", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30):
     img = Image.new("RGBA", (width, height), bgcolor)
     draw = ImageDraw.Draw(img)
 
diff --git a/epub.py b/ebook/epub.py
similarity index 100%
rename from epub.py
rename to ebook/epub.py
diff --git a/leech.py b/leech.py
index 3ea9a8f..763c292 100755
--- a/leech.py
+++ b/leech.py
@@ -3,12 +3,10 @@
 import argparse
 import sys
 import json
-import datetime
 import http.cookiejar
 
 import sites
-import epub
-import cover
+import ebook
 
 import requests
 import requests_cache
@@ -16,60 +14,6 @@ import requests_cache
 __version__ = 1
 USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__
 
-html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
-<head>
-    <title>{title}</title>
-    <link rel="stylesheet" type="text/css" href="../Styles/base.css" />
-</head>
-<body>
-<h1>{title}</h1>
-{text}
-</body>
-</html>
-'''
-
-cover_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-    <title>Cover</title>
-    <link rel="stylesheet" type="text/css" href="Styles/base.css" />
-</head>
-<body>
-<div class="cover">
-<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
-    width="100%" height="100%" viewBox="0 0 573 800" preserveAspectRatio="xMidYMid meet">
-<image width="600" height="800" xlink:href="images/cover.png" />
-</svg>
-</div>
-</body>
-</html>
-'''
-
-frontmatter_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-    <title>Front Matter</title>
-    <link rel="stylesheet" type="text/css" href="Styles/base.css" />
-</head>
-<body>
-<div class="cover title">
-    <h1>{title}<br />By {author}</h1>
-    <dl>
-        <dt>Source</dt>
-        <dd>{unique_id}</dd>
-        <dt>Started</dt>
-        <dd>{started:%Y-%m-%d}</dd>
-        <dt>Updated</dt>
-        <dd>{updated:%Y-%m-%d}</dd>
-        <dt>Downloaded on</dt>
-        <dd>{now:%Y-%m-%d}</dd>
-    </dl>
-</div>
-</body>
-</html>
-'''
-
 
 def leech(url, session, filename=None, args=None):
     # we have: a page, which could be absolutely any part of a story, or not a story at all
@@ -92,49 +36,7 @@ def leech(url, session, filename=None, args=None):
     if not story:
         raise Exception("Couldn't extract story")
 
-    dates = list(story.dates())
-    metadata = {
-        'title': story.title,
-        'author': story.author,
-        'unique_id': url,
-        'started': min(dates),
-        'updated': max(dates),
-    }
-
-    # The cover is static, and the only change comes from the image which we generate
-    html = [('Cover', 'cover.html', cover_template)]
-    cover_image = ('images/cover.png', cover.make_cover(story.title, story.author).read(), 'image/png')
-
-    html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
-
-    html.extend(chapter_html(story))
-
-    css = ('Styles/base.css', session.get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')
-
-    filename = filename or story.title + '.epub'
-
-    # print([c[0:-1] for c in html])
-    filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image))
-
-    return filename
-
-
-def chapter_html(story, titleprefix=None):
-    chapters = []
-    for i, chapter in enumerate(story):
-        if hasattr(chapter, '__iter__'):
-            # This is a Section
-            chapters.extend(chapter_html(chapter, titleprefix=chapter.title))
-        else:
-            title = titleprefix and '{}: {}'.format(titleprefix, chapter.title) or chapter.title
-            chapters.append((
-                title,
-                '{}/chapter{}.html'.format(story.id, i + 1),
-                html_template.format(title=title, text=chapter.contents)
-            ))
-    if story.footnotes:
-        chapters.append(("Footnotes", '{}/footnotes.html'.format(story.id), html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
-    return chapters
+    return ebook.generate_epub(story, filename)
 
 
 if __name__ == '__main__':
diff --git a/sites/__init__.py b/sites/__init__.py
index 5aeed41..70ab656 100644
--- a/sites/__init__.py
+++ b/sites/__init__.py
@@ -25,6 +25,7 @@ class Chapter:
 class Section:
     title = attr.ib()
     author = attr.ib()
+    url = attr.ib()
     id = attr.ib(default=attr.Factory(_default_uuid_string), convert=str)
     contents = attr.ib(default=attr.Factory(list))
     footnotes = attr.ib(default=attr.Factory(list))
@@ -155,6 +156,7 @@ def get(url):
         match = site_class.matches(url)
         if match:
             return site_class, match
+    raise NotImplementedError("Could not find a handler for " + url)
 
 
 # And now, a particularly hacky take on a plugin system:
diff --git a/sites/ao3.py b/sites/ao3.py
index b4062ec..9fd2e24 100644
--- a/sites/ao3.py
+++ b/sites/ao3.py
@@ -25,7 +25,8 @@ class ArchiveOfOurOwn(Site):
         metadata = soup.select('#main h2.heading a')
         story = Section(
             title=metadata[0].string,
-            author=metadata[1].string
+            author=metadata[1].string,
+            url=url
         )
 
         for chapter in soup.select('#main ol[role="navigation"] li'):
diff --git a/sites/arbitrary.py b/sites/arbitrary.py
index ee06e4f..1463f14 100644
--- a/sites/arbitrary.py
+++ b/sites/arbitrary.py
@@ -58,7 +58,8 @@ class Arbitrary(Site):
                     title=chapter.string,
                     contents=self._chapter(chapter_url, definition),
                     # TODO: better date detection
-                    date=datetime.datetime.now()
+                    date=datetime.datetime.now(),
+                    url=url
                 ))
         else:
             story.add(Chapter(
diff --git a/sites/deviantart.py b/sites/deviantart.py
index 014b030..bb2775a 100644
--- a/sites/deviantart.py
+++ b/sites/deviantart.py
@@ -29,7 +29,8 @@ class DeviantArt(Stash):
 
         story = Section(
             title=str(content.find(class_="folder-title").string),
-            author=author
+            author=author,
+            url=url
         )
 
         thumbs = content.select(".stream a.thumb")
diff --git a/sites/fanfictionnet.py b/sites/fanfictionnet.py
index 505d4be..c3a6792 100644
--- a/sites/fanfictionnet.py
+++ b/sites/fanfictionnet.py
@@ -25,7 +25,8 @@ class FanFictionNet(Site):
 
         story = Section(
             title=str(metadata.find('b', class_="xcontrast_txt").string),
-            author=str(metadata.find('a', class_="xcontrast_txt").string)
+            author=str(metadata.find('a', class_="xcontrast_txt").string),
+            url=url
         )
 
         dates = content.find_all('span', attrs={'data-xutime': True})
diff --git a/sites/stash.py b/sites/stash.py
index fc957ee..e7487b6 100644
--- a/sites/stash.py
+++ b/sites/stash.py
@@ -23,7 +23,8 @@ class Stash(Site):
         # metadata = content.find(id='profile_top')
         story = Section(
             title=str(soup.find(class_="stash-folder-name").h2.string),
-            author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s")
+            author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s"),
+            url=url
         )
 
         thumbs = content.select(".stash-folder-stream .thumb")
diff --git a/sites/xenforo.py b/sites/xenforo.py
index be896d2..cb0e8a1 100644
--- a/sites/xenforo.py
+++ b/sites/xenforo.py
@@ -32,7 +32,8 @@ class XenForo(Site):
 
         story = Section(
             title=soup.select('div.titleBar > h1')[0].get_text(),
-            author=soup.find('p', id='pageDescription').find('a', class_='username').get_text()
+            author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(),
+            url=url
         )
 
         marks = [

From 1c577b6f671b631eec428d654d461b3cac595808 Mon Sep 17 00:00:00 2001
From: Will Oursler <woursler@google.com>
Date: Thu, 12 Oct 2017 10:07:22 -0400
Subject: [PATCH 2/2] Fix lint errors

---
 ebook/__init__.py | 41 ++++++++++++++++++++---------------------
 sites/ao3.py      |  2 +-
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/ebook/__init__.py b/ebook/__init__.py
index 2091ca2..87a769d 100644
--- a/ebook/__init__.py
+++ b/ebook/__init__.py
@@ -1,7 +1,6 @@
-from .epub import *
-from .cover import *
+from .epub import make_epub
+from .cover import make_cover
 
-import os
 import datetime
 import requests
 
@@ -78,29 +77,29 @@ def chapter_html(story, titleprefix=None):
     return chapters
 
 
-def generate_epub(story, output_filename = None):
-  dates = list(story.dates())
-  metadata = {
-    'title': story.title,
-    'author': story.author,
-    'unique_id': story.url,
-    'started': min(dates),
-    'updated': max(dates),
-  }
+def generate_epub(story, output_filename=None):
+    dates = list(story.dates())
+    metadata = {
+        'title': story.title,
+        'author': story.author,
+        'unique_id': story.url,
+        'started': min(dates),
+        'updated': max(dates),
+    }
 
-  # The cover is static, and the only change comes from the image which we generate
-  html = [('Cover', 'cover.html', cover_template)]
+    # The cover is static, and the only change comes from the image which we generate
+    html = [('Cover', 'cover.html', cover_template)]
 
-  cover_image = ('images/cover.png', cover.make_cover(story.title, story.author).read(), 'image/png')
+    cover_image = ('images/cover.png', make_cover(story.title, story.author).read(), 'image/png')
 
-  html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
+    html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
 
-  html.extend(chapter_html(story))
+    html.extend(chapter_html(story))
 
-  css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')
+    css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')
 
-  output_filename = output_filename or story.title + '.epub'
+    output_filename = output_filename or story.title + '.epub'
 
-  output_filename = epub.make_epub(output_filename, html, metadata, extra_files=(css, cover_image))
+    output_filename = make_epub(output_filename, html, metadata, extra_files=(css, cover_image))
 
-  return output_filename
+    return output_filename
diff --git a/sites/ao3.py b/sites/ao3.py
index 9fd2e24..4523ae6 100644
--- a/sites/ao3.py
+++ b/sites/ao3.py
@@ -26,7 +26,7 @@ class ArchiveOfOurOwn(Site):
         story = Section(
             title=metadata[0].string,
             author=metadata[1].string,
-            url=url
+            url='http://archiveofourown.org/works/{}'.format(workid)
         )
 
         for chapter in soup.select('#main ol[role="navigation"] li'):