diff --git a/ebook/__init__.py b/ebook/__init__.py new file mode 100644 index 0000000..87a769d --- /dev/null +++ b/ebook/__init__.py @@ -0,0 +1,105 @@ +from .epub import make_epub +from .cover import make_cover + +import datetime +import requests + +html_template = ''' + + + {title} + + + +

{title}

+{text} + + +''' + +cover_template = ''' + + + Cover + + + +
+ + + +
+ + +''' + +frontmatter_template = ''' + + + Front Matter + + + +
+

{title}
By {author}

+
+
Source
+
{unique_id}
+
Started
+
{started:%Y-%m-%d}
+
Updated
+
{updated:%Y-%m-%d}
+
Downloaded on
+
{now:%Y-%m-%d}
+
+
+ + +''' + + +def chapter_html(story, titleprefix=None): + chapters = [] + for i, chapter in enumerate(story): + if hasattr(chapter, '__iter__'): + # This is a Section + chapters.extend(chapter_html(chapter, titleprefix=chapter.title)) + else: + title = titleprefix and '{}: {}'.format(titleprefix, chapter.title) or chapter.title + chapters.append(( + title, + '{}/chapter{}.html'.format(story.id, i + 1), + html_template.format(title=title, text=chapter.contents) + )) + if story.footnotes: + chapters.append(("Footnotes", '{}/footnotes.html'.format(story.id), html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes)))) + return chapters + + +def generate_epub(story, output_filename=None): + dates = list(story.dates()) + metadata = { + 'title': story.title, + 'author': story.author, + 'unique_id': story.url, + 'started': min(dates), + 'updated': max(dates), + } + + # The cover is static, and the only change comes from the image which we generate + html = [('Cover', 'cover.html', cover_template)] + + cover_image = ('images/cover.png', make_cover(story.title, story.author).read(), 'image/png') + + html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata))) + + html.extend(chapter_html(story)) + + css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css') + + output_filename = output_filename or story.title + '.epub' + + output_filename = make_epub(output_filename, html, metadata, extra_files=(css, cover_image)) + + return output_filename diff --git a/cover.py b/ebook/cover.py similarity index 91% rename from cover.py rename to ebook/cover.py index 1107101..1d21668 100644 --- a/cover.py +++ b/ebook/cover.py @@ -4,7 +4,7 @@ from io import BytesIO import textwrap -def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30): +def make_cover(title, author, width=600, height=800, fontname="FreeSans", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30): img = Image.new("RGBA", (width, height), bgcolor) draw = ImageDraw.Draw(img) diff --git a/epub.py b/ebook/epub.py similarity index 100% rename from epub.py rename to ebook/epub.py diff --git a/leech.py b/leech.py index 3ea9a8f..763c292 100755 --- a/leech.py +++ b/leech.py @@ -3,12 +3,10 @@ import argparse import sys import json -import datetime import http.cookiejar import sites -import epub -import cover +import ebook import requests import requests_cache @@ -16,60 +14,6 @@ import requests_cache __version__ = 1 USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__ -html_template = ''' - - - {title} - - - -

{title}

-{text} - - -''' - -cover_template = ''' - - - Cover - - - -
- - - -
- - -''' - -frontmatter_template = ''' - - - Front Matter - - - -
-

{title}
By {author}

-
-
Source
-
{unique_id}
-
Started
-
{started:%Y-%m-%d}
-
Updated
-
{updated:%Y-%m-%d}
-
Downloaded on
-
{now:%Y-%m-%d}
-
-
- - -''' - def leech(url, session, filename=None, args=None): # we have: a page, which could be absolutely any part of a story, or not a story at all @@ -92,49 +36,7 @@ def leech(url, session, filename=None, args=None): if not story: raise Exception("Couldn't extract story") - dates = list(story.dates()) - metadata = { - 'title': story.title, - 'author': story.author, - 'unique_id': url, - 'started': min(dates), - 'updated': max(dates), - } - - # The cover is static, and the only change comes from the image which we generate - html = [('Cover', 'cover.html', cover_template)] - cover_image = ('images/cover.png', cover.make_cover(story.title, story.author).read(), 'image/png') - - html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata))) - - html.extend(chapter_html(story)) - - css = ('Styles/base.css', session.get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css') - - filename = filename or story.title + '.epub' - - # print([c[0:-1] for c in html]) - filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image)) - - return filename - - -def chapter_html(story, titleprefix=None): - chapters = [] - for i, chapter in enumerate(story): - if hasattr(chapter, '__iter__'): - # This is a Section - chapters.extend(chapter_html(chapter, titleprefix=chapter.title)) - else: - title = titleprefix and '{}: {}'.format(titleprefix, chapter.title) or chapter.title - chapters.append(( - title, - '{}/chapter{}.html'.format(story.id, i + 1), - html_template.format(title=title, text=chapter.contents) - )) - if story.footnotes: - chapters.append(("Footnotes", '{}/footnotes.html'.format(story.id), html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes)))) - return chapters + return ebook.generate_epub(story, filename) if __name__ == '__main__': diff --git a/sites/__init__.py b/sites/__init__.py index 5aeed41..70ab656 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -25,6 +25,7 @@ class Chapter: class Section: title = attr.ib() author = attr.ib() + url = attr.ib() id = attr.ib(default=attr.Factory(_default_uuid_string), convert=str) contents = attr.ib(default=attr.Factory(list)) footnotes = attr.ib(default=attr.Factory(list)) @@ -155,6 +156,7 @@ def get(url): match = site_class.matches(url) if match: return site_class, match + raise NotImplementedError("Could not find a handler for " + url) # And now, a particularly hacky take on a plugin system: diff --git a/sites/ao3.py b/sites/ao3.py index b4062ec..4523ae6 100644 --- a/sites/ao3.py +++ b/sites/ao3.py @@ -25,7 +25,8 @@ class ArchiveOfOurOwn(Site): metadata = soup.select('#main h2.heading a') story = Section( title=metadata[0].string, - author=metadata[1].string + author=metadata[1].string, + url='http://archiveofourown.org/works/{}'.format(workid) ) for chapter in soup.select('#main ol[role="navigation"] li'): diff --git a/sites/arbitrary.py b/sites/arbitrary.py index ee06e4f..1463f14 100644 --- a/sites/arbitrary.py +++ b/sites/arbitrary.py @@ -58,7 +58,8 @@ class Arbitrary(Site): title=chapter.string, contents=self._chapter(chapter_url, definition), # TODO: better date detection - date=datetime.datetime.now() + date=datetime.datetime.now(), + url=url )) else: story.add(Chapter( diff --git a/sites/deviantart.py b/sites/deviantart.py index 014b030..bb2775a 100644 --- a/sites/deviantart.py +++ b/sites/deviantart.py @@ -29,7 +29,8 @@ class DeviantArt(Stash): story = Section( title=str(content.find(class_="folder-title").string), - author=author + author=author, + url=url ) thumbs = content.select(".stream a.thumb") diff --git a/sites/fanfictionnet.py b/sites/fanfictionnet.py index 505d4be..c3a6792 100644 --- a/sites/fanfictionnet.py +++ b/sites/fanfictionnet.py @@ -25,7 +25,8 @@ class FanFictionNet(Site): story = Section( title=str(metadata.find('b', class_="xcontrast_txt").string), - author=str(metadata.find('a', class_="xcontrast_txt").string) + author=str(metadata.find('a', class_="xcontrast_txt").string), + url=url ) dates = content.find_all('span', attrs={'data-xutime': True}) diff --git a/sites/stash.py b/sites/stash.py index fc957ee..e7487b6 100644 --- a/sites/stash.py +++ b/sites/stash.py @@ -23,7 +23,8 @@ class Stash(Site): # metadata = content.find(id='profile_top') story = Section( title=str(soup.find(class_="stash-folder-name").h2.string), - author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s") + author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s"), + url=url ) thumbs = content.select(".stash-folder-stream .thumb") diff --git a/sites/xenforo.py b/sites/xenforo.py index be896d2..cb0e8a1 100644 --- a/sites/xenforo.py +++ b/sites/xenforo.py @@ -32,7 +32,8 @@ class XenForo(Site): story = Section( title=soup.select('div.titleBar > h1')[0].get_text(), - author=soup.find('p', id='pageDescription').find('a', class_='username').get_text() + author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(), + url=url ) marks = [