Merge pull request #8 from Zomega/modularize

Splits out ebook generation logic into a seperate module
2025-12-13 20:04:59 +01:00 · 2017-10-12 10:00:05 -05:00 · 2017-10-12 10:00:05 -05:00 · f6e4a86a50
commit f6e4a86a50
parent a92575687c 1c577b6f67
11 changed files with 122 additions and 107 deletions
--- a/ebook/init.py
+++ b/ebook/init.py
@ -0,0 +1,105 @@
+from .epub import make_epub
+from .cover import make_cover
+
+import datetime
+import requests
+
+html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
+<head>
+    <title>{title}</title>
+    <link rel="stylesheet" type="text/css" href="../Styles/base.css" />
+</head>
+<body>
+<h1>{title}</h1>
+{text}
+</body>
+</html>
+'''
+
+cover_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+    <title>Cover</title>
+    <link rel="stylesheet" type="text/css" href="Styles/base.css" />
+</head>
+<body>
+<div class="cover">
+<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
+    width="100%" height="100%" viewBox="0 0 573 800" preserveAspectRatio="xMidYMid meet">
+<image width="600" height="800" xlink:href="images/cover.png" />
+</svg>
+</div>
+</body>
+</html>
+'''
+
+frontmatter_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+    <title>Front Matter</title>
+    <link rel="stylesheet" type="text/css" href="Styles/base.css" />
+</head>
+<body>
+<div class="cover title">
+    <h1>{title}<br />By {author}</h1>
+    <dl>
+        <dt>Source</dt>
+        <dd>{unique_id}</dd>
+        <dt>Started</dt>
+        <dd>{started:%Y-%m-%d}</dd>
+        <dt>Updated</dt>
+        <dd>{updated:%Y-%m-%d}</dd>
+        <dt>Downloaded on</dt>
+        <dd>{now:%Y-%m-%d}</dd>
+    </dl>
+</div>
+</body>
+</html>
+'''
+
+
+def chapter_html(story, titleprefix=None):
+    chapters = []
+    for i, chapter in enumerate(story):
+        if hasattr(chapter, '__iter__'):
+            # This is a Section
+            chapters.extend(chapter_html(chapter, titleprefix=chapter.title))
+        else:
+            title = titleprefix and '{}: {}'.format(titleprefix, chapter.title) or chapter.title
+            chapters.append((
+                title,
+                '{}/chapter{}.html'.format(story.id, i + 1),
+                html_template.format(title=title, text=chapter.contents)
+            ))
+    if story.footnotes:
+        chapters.append(("Footnotes", '{}/footnotes.html'.format(story.id), html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
+    return chapters
+
+
+def generate_epub(story, output_filename=None):
+    dates = list(story.dates())
+    metadata = {
+        'title': story.title,
+        'author': story.author,
+        'unique_id': story.url,
+        'started': min(dates),
+        'updated': max(dates),
+    }
+
+    # The cover is static, and the only change comes from the image which we generate
+    html = [('Cover', 'cover.html', cover_template)]
+
+    cover_image = ('images/cover.png', make_cover(story.title, story.author).read(), 'image/png')
+
+    html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
+
+    html.extend(chapter_html(story))
+
+    css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')
+
+    output_filename = output_filename or story.title + '.epub'
+
+    output_filename = make_epub(output_filename, html, metadata, extra_files=(css, cover_image))
+
+    return output_filename
--- a/ebook/cover.py
+++ b/ebook/cover.py
@ -4,7 +4,7 @@ from io import BytesIO
 import textwrap


-def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30):
+def make_cover(title, author, width=600, height=800, fontname="FreeSans", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30):
    img = Image.new("RGBA", (width, height), bgcolor)
    draw = ImageDraw.Draw(img)

--- a/ebook/epub.py
+++ b/ebook/epub.py
--- a/leech.py
+++ b/leech.py
@ -3,12 +3,10 @@
 import argparse
 import sys
 import json
-import datetime
 import http.cookiejar

 import sites
-import epub
-import cover
+import ebook

 import requests
 import requests_cache
@ -16,60 +14,6 @@ import requests_cache
 __version__ = 1
 USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__

-html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
-<head>
-    <title>{title}</title>
-    <link rel="stylesheet" type="text/css" href="../Styles/base.css" />
-</head>
-<body>
-<h1>{title}</h1>
-{text}
-</body>
-</html>
-'''
-
-cover_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-    <title>Cover</title>
-    <link rel="stylesheet" type="text/css" href="Styles/base.css" />
-</head>
-<body>
-<div class="cover">
-<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
-    width="100%" height="100%" viewBox="0 0 573 800" preserveAspectRatio="xMidYMid meet">
-<image width="600" height="800" xlink:href="images/cover.png" />
-</svg>
-</div>
-</body>
-</html>
-'''
-
-frontmatter_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-    <title>Front Matter</title>
-    <link rel="stylesheet" type="text/css" href="Styles/base.css" />
-</head>
-<body>
-<div class="cover title">
-    <h1>{title}<br />By {author}</h1>
-    <dl>
-        <dt>Source</dt>
-        <dd>{unique_id}</dd>
-        <dt>Started</dt>
-        <dd>{started:%Y-%m-%d}</dd>
-        <dt>Updated</dt>
-        <dd>{updated:%Y-%m-%d}</dd>
-        <dt>Downloaded on</dt>
-        <dd>{now:%Y-%m-%d}</dd>
-    </dl>
-</div>
-</body>
-</html>
-'''
-

 def leech(url, session, filename=None, args=None):
    # we have: a page, which could be absolutely any part of a story, or not a story at all
@ -92,49 +36,7 @@ def leech(url, session, filename=None, args=None):
    if not story:
        raise Exception("Couldn't extract story")

-    dates = list(story.dates())
-    metadata = {
-        'title': story.title,
-        'author': story.author,
-        'unique_id': url,
-        'started': min(dates),
-        'updated': max(dates),
-    }
-
-    # The cover is static, and the only change comes from the image which we generate
-    html = [('Cover', 'cover.html', cover_template)]
-    cover_image = ('images/cover.png', cover.make_cover(story.title, story.author).read(), 'image/png')
-
-    html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
-
-    html.extend(chapter_html(story))
-
-    css = ('Styles/base.css', session.get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')
-
-    filename = filename or story.title + '.epub'
-
-    # print([c[0:-1] for c in html])
-    filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image))
-
-    return filename
-
-
-def chapter_html(story, titleprefix=None):
-    chapters = []
-    for i, chapter in enumerate(story):
-        if hasattr(chapter, '__iter__'):
-            # This is a Section
-            chapters.extend(chapter_html(chapter, titleprefix=chapter.title))
-        else:
-            title = titleprefix and '{}: {}'.format(titleprefix, chapter.title) or chapter.title
-            chapters.append((
-                title,
-                '{}/chapter{}.html'.format(story.id, i + 1),
-                html_template.format(title=title, text=chapter.contents)
-            ))
-    if story.footnotes:
-        chapters.append(("Footnotes", '{}/footnotes.html'.format(story.id), html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
-    return chapters
+    return ebook.generate_epub(story, filename)


 if __name__ == '__main__':
--- a/sites/init.py
+++ b/sites/init.py
@ -25,6 +25,7 @@ class Chapter:
 class Section:
    title = attr.ib()
    author = attr.ib()
+    url = attr.ib()
    id = attr.ib(default=attr.Factory(_default_uuid_string), convert=str)
    contents = attr.ib(default=attr.Factory(list))
    footnotes = attr.ib(default=attr.Factory(list))
@ -155,6 +156,7 @@ def get(url):
        match = site_class.matches(url)
        if match:
            return site_class, match
+    raise NotImplementedError("Could not find a handler for " + url)


 # And now, a particularly hacky take on a plugin system:
--- a/sites/ao3.py
+++ b/sites/ao3.py
@ -25,7 +25,8 @@ class ArchiveOfOurOwn(Site):
        metadata = soup.select('#main h2.heading a')
        story = Section(
            title=metadata[0].string,
-            author=metadata[1].string
+            author=metadata[1].string,
+            url='http://archiveofourown.org/works/{}'.format(workid)
        )

        for chapter in soup.select('#main ol[role="navigation"] li'):
--- a/sites/arbitrary.py
+++ b/sites/arbitrary.py
@ -58,7 +58,8 @@ class Arbitrary(Site):
                    title=chapter.string,
                    contents=self._chapter(chapter_url, definition),
                    # TODO: better date detection
-                    date=datetime.datetime.now()
+                    date=datetime.datetime.now(),
+                    url=url
                ))
        else:
            story.add(Chapter(
--- a/sites/deviantart.py
+++ b/sites/deviantart.py
@ -29,7 +29,8 @@ class DeviantArt(Stash):

        story = Section(
            title=str(content.find(class_="folder-title").string),
-            author=author
+            author=author,
+            url=url
        )

        thumbs = content.select(".stream a.thumb")
--- a/sites/fanfictionnet.py
+++ b/sites/fanfictionnet.py
@ -25,7 +25,8 @@ class FanFictionNet(Site):

        story = Section(
            title=str(metadata.find('b', class_="xcontrast_txt").string),
-            author=str(metadata.find('a', class_="xcontrast_txt").string)
+            author=str(metadata.find('a', class_="xcontrast_txt").string),
+            url=url
        )

        dates = content.find_all('span', attrs={'data-xutime': True})
--- a/sites/stash.py
+++ b/sites/stash.py
@ -23,7 +23,8 @@ class Stash(Site):
        # metadata = content.find(id='profile_top')
        story = Section(
            title=str(soup.find(class_="stash-folder-name").h2.string),
-            author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s")
+            author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s"),
+            url=url
        )

        thumbs = content.select(".stash-folder-stream .thumb")
--- a/sites/xenforo.py
+++ b/sites/xenforo.py
@ -32,7 +32,8 @@ class XenForo(Site):

        story = Section(
            title=soup.select('div.titleBar > h1')[0].get_text(),
-            author=soup.find('p', id='pageDescription').find('a', class_='username').get_text()
+            author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(),
+            url=url
        )

        marks = [