diff --git a/ebook/__init__.py b/ebook/__init__.py
new file mode 100644
index 0000000..87a769d
--- /dev/null
+++ b/ebook/__init__.py
@@ -0,0 +1,105 @@
+from .epub import make_epub
+from .cover import make_cover
+
+import datetime
+import requests
+
+html_template = '''
+
+
+ {title}
+
+
+
+{title}
+{text}
+
+
+'''
+
+cover_template = '''
+
+
+ Cover
+
+
+
+
+
+
+
+
+'''
+
+frontmatter_template = '''
+
+
+ Front Matter
+
+
+
+
+
{title}
By {author}
+
+ - Source
+ - {unique_id}
+ - Started
+ - {started:%Y-%m-%d}
+ - Updated
+ - {updated:%Y-%m-%d}
+ - Downloaded on
+ - {now:%Y-%m-%d}
+
+
+
+
+'''
+
+
+def chapter_html(story, titleprefix=None):
+ chapters = []
+ for i, chapter in enumerate(story):
+ if hasattr(chapter, '__iter__'):
+ # This is a Section
+ chapters.extend(chapter_html(chapter, titleprefix=chapter.title))
+ else:
+ title = titleprefix and '{}: {}'.format(titleprefix, chapter.title) or chapter.title
+ chapters.append((
+ title,
+ '{}/chapter{}.html'.format(story.id, i + 1),
+ html_template.format(title=title, text=chapter.contents)
+ ))
+ if story.footnotes:
+ chapters.append(("Footnotes", '{}/footnotes.html'.format(story.id), html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
+ return chapters
+
+
+def generate_epub(story, output_filename=None):
+ dates = list(story.dates())
+ metadata = {
+ 'title': story.title,
+ 'author': story.author,
+ 'unique_id': story.url,
+ 'started': min(dates),
+ 'updated': max(dates),
+ }
+
+ # The cover is static, and the only change comes from the image which we generate
+ html = [('Cover', 'cover.html', cover_template)]
+
+ cover_image = ('images/cover.png', make_cover(story.title, story.author).read(), 'image/png')
+
+ html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
+
+ html.extend(chapter_html(story))
+
+ css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')
+
+ output_filename = output_filename or story.title + '.epub'
+
+ output_filename = make_epub(output_filename, html, metadata, extra_files=(css, cover_image))
+
+ return output_filename
diff --git a/cover.py b/ebook/cover.py
similarity index 91%
rename from cover.py
rename to ebook/cover.py
index 1107101..1d21668 100644
--- a/cover.py
+++ b/ebook/cover.py
@@ -4,7 +4,7 @@ from io import BytesIO
import textwrap
-def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30):
+def make_cover(title, author, width=600, height=800, fontname="FreeSans", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30):
img = Image.new("RGBA", (width, height), bgcolor)
draw = ImageDraw.Draw(img)
diff --git a/epub.py b/ebook/epub.py
similarity index 100%
rename from epub.py
rename to ebook/epub.py
diff --git a/leech.py b/leech.py
index 3ea9a8f..763c292 100755
--- a/leech.py
+++ b/leech.py
@@ -3,12 +3,10 @@
import argparse
import sys
import json
-import datetime
import http.cookiejar
import sites
-import epub
-import cover
+import ebook
import requests
import requests_cache
@@ -16,60 +14,6 @@ import requests_cache
__version__ = 1
USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__
-html_template = '''
-
-
- {title}
-
-
-
-{title}
-{text}
-
-
-'''
-
-cover_template = '''
-
-
- Cover
-
-
-
-
-
-
-
-
-'''
-
-frontmatter_template = '''
-
-
- Front Matter
-
-
-
-
-
{title}
By {author}
-
- - Source
- - {unique_id}
- - Started
- - {started:%Y-%m-%d}
- - Updated
- - {updated:%Y-%m-%d}
- - Downloaded on
- - {now:%Y-%m-%d}
-
-
-
-
-'''
-
def leech(url, session, filename=None, args=None):
# we have: a page, which could be absolutely any part of a story, or not a story at all
@@ -92,49 +36,7 @@ def leech(url, session, filename=None, args=None):
if not story:
raise Exception("Couldn't extract story")
- dates = list(story.dates())
- metadata = {
- 'title': story.title,
- 'author': story.author,
- 'unique_id': url,
- 'started': min(dates),
- 'updated': max(dates),
- }
-
- # The cover is static, and the only change comes from the image which we generate
- html = [('Cover', 'cover.html', cover_template)]
- cover_image = ('images/cover.png', cover.make_cover(story.title, story.author).read(), 'image/png')
-
- html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
-
- html.extend(chapter_html(story))
-
- css = ('Styles/base.css', session.get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')
-
- filename = filename or story.title + '.epub'
-
- # print([c[0:-1] for c in html])
- filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image))
-
- return filename
-
-
-def chapter_html(story, titleprefix=None):
- chapters = []
- for i, chapter in enumerate(story):
- if hasattr(chapter, '__iter__'):
- # This is a Section
- chapters.extend(chapter_html(chapter, titleprefix=chapter.title))
- else:
- title = titleprefix and '{}: {}'.format(titleprefix, chapter.title) or chapter.title
- chapters.append((
- title,
- '{}/chapter{}.html'.format(story.id, i + 1),
- html_template.format(title=title, text=chapter.contents)
- ))
- if story.footnotes:
- chapters.append(("Footnotes", '{}/footnotes.html'.format(story.id), html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
- return chapters
+ return ebook.generate_epub(story, filename)
if __name__ == '__main__':
diff --git a/sites/__init__.py b/sites/__init__.py
index 5aeed41..70ab656 100644
--- a/sites/__init__.py
+++ b/sites/__init__.py
@@ -25,6 +25,7 @@ class Chapter:
class Section:
title = attr.ib()
author = attr.ib()
+ url = attr.ib()
id = attr.ib(default=attr.Factory(_default_uuid_string), convert=str)
contents = attr.ib(default=attr.Factory(list))
footnotes = attr.ib(default=attr.Factory(list))
@@ -155,6 +156,7 @@ def get(url):
match = site_class.matches(url)
if match:
return site_class, match
+ raise NotImplementedError("Could not find a handler for " + url)
# And now, a particularly hacky take on a plugin system:
diff --git a/sites/ao3.py b/sites/ao3.py
index b4062ec..4523ae6 100644
--- a/sites/ao3.py
+++ b/sites/ao3.py
@@ -25,7 +25,8 @@ class ArchiveOfOurOwn(Site):
metadata = soup.select('#main h2.heading a')
story = Section(
title=metadata[0].string,
- author=metadata[1].string
+ author=metadata[1].string,
+ url='http://archiveofourown.org/works/{}'.format(workid)
)
for chapter in soup.select('#main ol[role="navigation"] li'):
diff --git a/sites/arbitrary.py b/sites/arbitrary.py
index ee06e4f..1463f14 100644
--- a/sites/arbitrary.py
+++ b/sites/arbitrary.py
@@ -58,7 +58,8 @@ class Arbitrary(Site):
title=chapter.string,
contents=self._chapter(chapter_url, definition),
# TODO: better date detection
- date=datetime.datetime.now()
+ date=datetime.datetime.now(),
+ url=url
))
else:
story.add(Chapter(
diff --git a/sites/deviantart.py b/sites/deviantart.py
index 014b030..bb2775a 100644
--- a/sites/deviantart.py
+++ b/sites/deviantart.py
@@ -29,7 +29,8 @@ class DeviantArt(Stash):
story = Section(
title=str(content.find(class_="folder-title").string),
- author=author
+ author=author,
+ url=url
)
thumbs = content.select(".stream a.thumb")
diff --git a/sites/fanfictionnet.py b/sites/fanfictionnet.py
index 505d4be..c3a6792 100644
--- a/sites/fanfictionnet.py
+++ b/sites/fanfictionnet.py
@@ -25,7 +25,8 @@ class FanFictionNet(Site):
story = Section(
title=str(metadata.find('b', class_="xcontrast_txt").string),
- author=str(metadata.find('a', class_="xcontrast_txt").string)
+ author=str(metadata.find('a', class_="xcontrast_txt").string),
+ url=url
)
dates = content.find_all('span', attrs={'data-xutime': True})
diff --git a/sites/stash.py b/sites/stash.py
index fc957ee..e7487b6 100644
--- a/sites/stash.py
+++ b/sites/stash.py
@@ -23,7 +23,8 @@ class Stash(Site):
# metadata = content.find(id='profile_top')
story = Section(
title=str(soup.find(class_="stash-folder-name").h2.string),
- author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s")
+ author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s"),
+ url=url
)
thumbs = content.select(".stash-folder-stream .thumb")
diff --git a/sites/xenforo.py b/sites/xenforo.py
index be896d2..cb0e8a1 100644
--- a/sites/xenforo.py
+++ b/sites/xenforo.py
@@ -32,7 +32,8 @@ class XenForo(Site):
story = Section(
title=soup.select('div.titleBar > h1')[0].get_text(),
- author=soup.find('p', id='pageDescription').find('a', class_='username').get_text()
+ author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(),
+ url=url
)
marks = [