Merge pull request #16 from AlexRaubach/covers

Download cover art from RR and arbitrary sites
2026-02-14 18:52:32 +01:00 · 2018-10-01 12:18:39 -05:00 · 2018-10-01 12:18:39 -05:00 · 02bd6ae0c6
commit 02bd6ae0c6
parent f6acf4a73b 60084534a8
13 changed files with 72 additions and 19 deletions
--- a/README.markdown
+++ b/README.markdown
@ -65,7 +65,8 @@ Example:
        "fontname": "Comic Sans MS",
        "fontsize": 30,
        "bgcolor": [20, 120, 20],
-        "textcolor": [180, 20, 180]
+        "textcolor": [180, 20, 180],
+        "cover_url": "https://website.com/image.png"
    }
 }
 ```
@ -84,7 +85,8 @@ Example `practical.json`:
    "author": "erraticerrata",
    "chapter_selector": "#main .entry-content > ul > li > a",
    "content_selector": "#main .entry-content",
-    "filter_selector": ".sharedaddy, .wpcnt, style"
+    "filter_selector": ".sharedaddy, .wpcnt, style",
+    "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
 }
 ```

@ -92,9 +94,9 @@ Run as:

    $ ./leech.py practical.json

-This tells leech to load `url`, follow the links described by `chapter_selector`, extract the content from those pages as described by `content_selector`, and remove any content from *that* which matches `filter_selector`.
+This tells leech to load `url`, follow the links described by `chapter_selector`, extract the content from those pages as described by `content_selector`, and remove any content from *that* which matches `filter_selector`. Optionally, `cover_url` will replace the default cover with the image of your choice. 

-If `chapter_selector` isn't given, it'll create a single-chapter book by applying `content_selector` to `url`.
+If `chapter_selector` isn't given, it'll create a single-chapter book by applying `content_selector` to `url`. 

 This is a fairly viable way to extract a story from, say, a random Wordpress installation. It's relatively likely to get you at least *most* of the way to the ebook you want, with maybe some manual editing needed.

--- a/ebook/init.py
+++ b/ebook/init.py
@ -1,5 +1,6 @@
 from .epub import make_epub
 from .cover import make_cover
+from .cover import make_cover_from_url

 import datetime
 import requests
@ -69,6 +70,7 @@ class CoverOptions:
    wrapat = attr.ib(default=None, convert=attr.converters.optional(int))
    bgcolor = attr.ib(default=None, convert=attr.converters.optional(tuple))
    textcolor = attr.ib(default=None, convert=attr.converters.optional(tuple))
+    cover_url = attr.ib(default=None, convert=attr.converters.optional(str))


 def chapter_html(story, titleprefix=None):
@ -90,7 +92,7 @@ def chapter_html(story, titleprefix=None):
    return chapters


-def generate_epub(story, output_filename=None, cover_options={}):
+def generate_epub(story, cover_options={}, output_filename=None):
    dates = list(story.dates())
    metadata = {
        'title': story.title,
@ -106,7 +108,14 @@ def generate_epub(story, output_filename=None, cover_options={}):
    # The cover is static, and the only change comes from the image which we generate
    html = [('Cover', 'cover.html', cover_template)]

-    cover_image = ('images/cover.png', make_cover(story.title, story.author, **cover_options).read(), 'image/png')
+    if cover_options and cover_options["cover_url"]:
+        image = make_cover_from_url(cover_options["cover_url"], story.title, story.author)
+    elif story.cover_url:
+        image = make_cover_from_url(story.cover_url, story.title, story.author)
+    else:
+        image = make_cover(story.title, story.author, **cover_options)
+
+    cover_image = ('images/cover.png', image.read(), 'image/png')

    html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))

--- a/ebook/cover.py
+++ b/ebook/cover.py
@ -2,6 +2,10 @@
 from PIL import Image, ImageDraw, ImageFont
 from io import BytesIO
 import textwrap
+import requests
+import logging
+
+logger = logging.getLogger(__name__)


 def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30):
@ -28,6 +32,30 @@ def make_cover(title, author, width=600, height=800, fontname="Helvetica", fonts
    return output


+def make_cover_from_url(url, title, author):
+    try:
+        logger.info("Downloading cover from " + url)
+        img = requests.Session().get(url)
+        cover = BytesIO(img.content)
+
+        if Image.open(cover).format != "PNG":
+            cover = _convert_to_png(cover)
+    except Exception as e:
+        logger.info("Encountered an error downloading cover: " + e)
+        cover = make_cover(title, author)
+
+    return cover
+
+
+def _convert_to_png(image_bytestream):
+    png_image = BytesIO()
+    Image.open(image_bytestream).save(png_image, format="PNG")
+    png_image.name = 'cover.png'
+    png_image.seek(0)
+
+    return png_image
+
+
 def _safe_font(preferred, *args, **kwargs):
    for font in (preferred, "Helvetica", "FreeSans", "Arial"):
        try:
--- a/examples/practical1.json
+++ b/examples/practical1.json
@ -4,5 +4,6 @@
 	"author": "erraticerrata",
 	"chapter_selector": "#main .entry-content > ul:nth-of-type(1) > li > a",
 	"content_selector": "#main .entry-content",
-	"filter_selector": ".sharedaddy, .wpcnt, style"
+	"filter_selector": ".sharedaddy, .wpcnt, style",
+	"cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
 }
--- a/examples/practical2.json
+++ b/examples/practical2.json
@ -4,5 +4,6 @@
    "author": "erraticerrata",
    "chapter_selector": "#main .entry-content > ul:nth-of-type(2) > li > ul > li > a",
    "content_selector": "#main .entry-content",
-    "filter_selector": ".sharedaddy, .wpcnt, style"
+    "filter_selector": ".sharedaddy, .wpcnt, style",
+    "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
 }
--- a/examples/practical3.json
+++ b/examples/practical3.json
@ -4,5 +4,6 @@
    "author": "erraticerrata",
    "chapter_selector": "#main .entry-content > ul:nth-of-type(3) > li > a",
    "content_selector": "#main .entry-content",
-    "filter_selector": ".sharedaddy, .wpcnt, style"
+    "filter_selector": ".sharedaddy, .wpcnt, style",
+    "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
 }
--- a/examples/practical4.json
+++ b/examples/practical4.json
@ -4,5 +4,6 @@
    "author": "erraticerrata",
    "chapter_selector": "#main .entry-content > ul:nth-of-type(4) > li > a",
    "content_selector": "#main .entry-content",
-    "filter_selector": ".sharedaddy, .wpcnt, style"
+    "filter_selector": ".sharedaddy, .wpcnt, style",
+    "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
 }
--- a/examples/thegodsarebastards.json
+++ b/examples/thegodsarebastards.json
@ -4,5 +4,6 @@
  "author": "D. D. Webb",
  "chapter_selector": "article .entry-content a[href*='20']",
  "content_selector": "article .entry-content",
-  "filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']"
+  "filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']",
+  "cover_url": "https://tiraas.files.wordpress.com/2016/02/classof1182byhoarous.png"
 }
--- a/examples/worm.json
+++ b/examples/worm.json
@ -4,5 +4,6 @@
 	"author": "Wildbow",
 	"chapter_selector": "#main .entry-content  a",
 	"content_selector": "#main .entry-content",
-	"filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']"
+	"filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']",
+	"cover_url": "https://pre00.deviantart.net/969a/th/pre/i/2015/051/8/7/worm_cover_by_cactusfantastico-d8ivj4b.png"
 }
--- a/leech.py
+++ b/leech.py
@ -56,11 +56,13 @@ def load_on_disk_options(site):
            store = json.load(store_file)
            login = store.get('logins', {}).get(site.__name__, False)
            configured_site_options = store.get('site_options', {}).get(site.__name__, {})
+            cover_options = store.get('cover', {})
    except FileNotFoundError:
        logger.info("Unable to locate leech.json. Continuing assuming it does not exist.")
        login = False
        configured_site_options = {}
-    return configured_site_options, login
+        cover_options = {}
+    return configured_site_options, login, cover_options


 def create_options(site, site_options, unused_flags):
@ -71,7 +73,7 @@ def create_options(site, site_options, unused_flags):

    flag_specified_site_options = site.interpret_site_specific_options(**unused_flags)

-    configured_site_options, login = load_on_disk_options(site)
+    configured_site_options, login, cover_options = load_on_disk_options(site)

    overridden_site_options = json.loads(site_options)

@ -81,7 +83,8 @@ def create_options(site, site_options, unused_flags):
        list(default_site_options.items()) +
        list(configured_site_options.items()) +
        list(overridden_site_options.items()) +
-        list(flag_specified_site_options.items())
+        list(flag_specified_site_options.items()) +
+        list(cover_options.items())
    )
    return options, login

@ -146,7 +149,7 @@ def download(url, site_options, cache, verbose, **other_flags):
    options, login = create_options(site, site_options, other_flags)
    story = open_story(site, url, session, login, options)

-    filename = ebook.generate_epub(story)
+    filename = ebook.generate_epub(story, options)
    logger.info("File created: " + filename)


--- a/sites/init.py
+++ b/sites/init.py
@ -30,6 +30,7 @@ class Section:
    title = attr.ib()
    author = attr.ib()
    url = attr.ib()
+    cover_url = attr.ib(default='')
    id = attr.ib(default=attr.Factory(_default_uuid_string), convert=str)
    contents = attr.ib(default=attr.Factory(list))
    footnotes = attr.ib(default=attr.Factory(list))
--- a/sites/arbitrary.py
+++ b/sites/arbitrary.py
@ -18,7 +18,8 @@ Example JSON:
    "author": "erraticerrata",
    "chapter_selector": "#main .entry-content > ul > li > a",
    "content_selector": "#main .entry-content",
-    "filter_selector": ".sharedaddy, .wpcnt, style"
+    "filter_selector": ".sharedaddy, .wpcnt, style",
+    "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
 }
 """

@ -39,6 +40,7 @@ class SiteDefinition:
    next_selector = attr.ib(default=False)
    # If present, use to filter out content that matches the selector
    filter_selector = attr.ib(default=False)
+    cover_url = attr.ib(default='')


@register
@ -58,7 +60,8 @@ class Arbitrary(Site):
        story = Section(
            title=definition.title,
            author=definition.author,
-            url=url
+            url=url,
+            cover_url=definition.cover_url
        )

        if definition.chapter_selector:
--- a/sites/royalroad.py
+++ b/sites/royalroad.py
@ -33,7 +33,8 @@ class RoyalRoad(Site):
        story = Section(
            title=soup.find('h1', property='name').string.strip(),
            author=soup.find('meta', property='books:author').get('content').strip(),
-            url=soup.find('meta', property='og:url').get('content').strip()
+            url=soup.find('meta', property='og:url').get('content').strip(),
+            cover_url=soup.find('img', class_='thumbnail')['src']
        )

        for chapter in soup.select('#chapters tbody tr[data-url]'):