From fe76b5427b8231974878e49659cd1c2e8b7fa8de Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Fri, 31 Aug 2018 23:34:41 -0400 Subject: [PATCH 01/13] Add cover_url attribute --- sites/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sites/__init__.py b/sites/__init__.py index 63bb0bb..313496f 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -30,6 +30,7 @@ class Section: title = attr.ib() author = attr.ib() url = attr.ib() + cover_url = attr.ib(default='') id = attr.ib(default=attr.Factory(_default_uuid_string), convert=str) contents = attr.ib(default=attr.Factory(list)) footnotes = attr.ib(default=attr.Factory(list)) From 571e2627350e8490f11a421b7e3d01668fbcfa96 Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Fri, 31 Aug 2018 23:36:23 -0400 Subject: [PATCH 02/13] Find RR cover img src and assign to cover_url --- sites/royalroad.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sites/royalroad.py b/sites/royalroad.py index e41c668..1c1d59e 100644 --- a/sites/royalroad.py +++ b/sites/royalroad.py @@ -33,7 +33,8 @@ class RoyalRoad(Site): story = Section( title=soup.find('h1', property='name').string.strip(), author=soup.find('meta', property='books:author').get('content').strip(), - url=soup.find('meta', property='og:url').get('content').strip() + url=soup.find('meta', property='og:url').get('content').strip(), + cover_url=soup.find('img', class_='thumbnail')['src'] ) for chapter in soup.select('#chapters tbody tr[data-url]'): From ea60ac51223a1b56e7488eba1782a820ac9b7337 Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Sat, 1 Sep 2018 23:39:00 -0400 Subject: [PATCH 03/13] Download cover images for RoyalRoad Stories --- ebook/__init__.py | 8 +++++++- ebook/cover.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/ebook/__init__.py b/ebook/__init__.py index 3934dbc..ed52ebd 100644 --- a/ebook/__init__.py +++ b/ebook/__init__.py @@ -1,5 +1,6 @@ from .epub import make_epub from .cover import make_cover +from .cover import make_cover_from_url import datetime import requests @@ -105,7 +106,12 @@ def generate_epub(story, output_filename=None, cover_options={}): # The cover is static, and the only change comes from the image which we generate html = [('Cover', 'cover.html', cover_template)] - cover_image = ('images/cover.png', make_cover(story.title, story.author, **cover_options).read(), 'image/png') + if story.cover_url: + image = make_cover_from_url(story.cover_url, story.title, story.author) + else: + image = make_cover(story.title, story.author, **cover_options) + + cover_image = ('images/cover.png', image.read(), 'image/png') html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata))) diff --git a/ebook/cover.py b/ebook/cover.py index cf56232..3f16622 100644 --- a/ebook/cover.py +++ b/ebook/cover.py @@ -2,6 +2,7 @@ from PIL import Image, ImageDraw, ImageFont from io import BytesIO import textwrap +import requests def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30): @@ -27,6 +28,15 @@ def make_cover(title, author, width=600, height=800, fontname="Helvetica", fonts output.seek(0) return output +def make_cover_from_url(url, title, author): + try: + img = requests.Session().get(url) + cover = BytesIO(img.content) + except: + cover = make_cover(title, author) + + return cover + def _safe_font(preferred, *args, **kwargs): for font in (preferred, "Helvetica", "FreeSans", "Arial"): From ff568eef10514a975ad1d4b2ddb0c7ee2d028790 Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Sun, 2 Sep 2018 11:07:59 -0400 Subject: [PATCH 04/13] Allow arbitrary sites to include a cover url --- examples/practical1.json | 3 ++- sites/arbitrary.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/practical1.json b/examples/practical1.json index 00e1d20..214dae0 100644 --- a/examples/practical1.json +++ b/examples/practical1.json @@ -4,5 +4,6 @@ "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul:nth-of-type(1) > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } diff --git a/sites/arbitrary.py b/sites/arbitrary.py index d2864f2..2cba937 100644 --- a/sites/arbitrary.py +++ b/sites/arbitrary.py @@ -18,7 +18,8 @@ Example JSON: "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } """ @@ -33,6 +34,7 @@ class SiteDefinition: chapter_selector = attr.ib(default=False) # If this is present, it's used to filter out content that matches the selector filter_selector = attr.ib(default=False) + cover_url = attr.ib(default='') @register @@ -52,7 +54,8 @@ class Arbitrary(Site): story = Section( title=definition.title, author=definition.author, - url=url + url=url, + cover_url=definition.cover_url ) if definition.chapter_selector: From 53a3cde16db7fc9b550724614881c719e97ae880 Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Sun, 2 Sep 2018 20:24:15 -0400 Subject: [PATCH 05/13] Convert other cover image types to PNG --- ebook/cover.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ebook/cover.py b/ebook/cover.py index 3f16622..e191775 100644 --- a/ebook/cover.py +++ b/ebook/cover.py @@ -3,6 +3,7 @@ from PIL import Image, ImageDraw, ImageFont from io import BytesIO import textwrap import requests +import logging def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30): @@ -32,11 +33,21 @@ def make_cover_from_url(url, title, author): try: img = requests.Session().get(url) cover = BytesIO(img.content) + if Image.open(cover).format != "PNG": + cover = _convert_to_png(cover) except: + #logger.info("Encountered an error downloading cover, reverting to default cover") cover = make_cover(title, author) - + return cover +def _convert_to_png(image_bytestream): + img = Image.open(image_bytestream) + png_image = BytesIO() + img.save(png_image, format="PNG") + png_image.seek(0) + + return png_image def _safe_font(preferred, *args, **kwargs): for font in (preferred, "Helvetica", "FreeSans", "Arial"): From e765594e9e231f3e7d3448c48ff42c89902a5afe Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Sun, 2 Sep 2018 20:26:01 -0400 Subject: [PATCH 06/13] Add cover urls to more examples --- examples/practical2.json | 3 ++- examples/practical3.json | 3 ++- examples/practical4.json | 3 ++- examples/thegodsarebastards.json | 3 ++- examples/worm.json | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/examples/practical2.json b/examples/practical2.json index 2dfd4c9..0bb6631 100644 --- a/examples/practical2.json +++ b/examples/practical2.json @@ -4,5 +4,6 @@ "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul:nth-of-type(2) > li > ul > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } diff --git a/examples/practical3.json b/examples/practical3.json index cc883fb..c0adc43 100644 --- a/examples/practical3.json +++ b/examples/practical3.json @@ -4,5 +4,6 @@ "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul:nth-of-type(3) > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } diff --git a/examples/practical4.json b/examples/practical4.json index 76bf606..f958064 100644 --- a/examples/practical4.json +++ b/examples/practical4.json @@ -4,5 +4,6 @@ "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul:nth-of-type(4) > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } diff --git a/examples/thegodsarebastards.json b/examples/thegodsarebastards.json index 9fa80cd..701d026 100644 --- a/examples/thegodsarebastards.json +++ b/examples/thegodsarebastards.json @@ -4,5 +4,6 @@ "author": "D. D. Webb", "chapter_selector": "article .entry-content a[href*='20']", "content_selector": "article .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']" + "filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']", + "cover_url": "https://tiraas.files.wordpress.com/2016/02/classof1182byhoarous.png" } diff --git a/examples/worm.json b/examples/worm.json index a021a5e..37b6b9b 100644 --- a/examples/worm.json +++ b/examples/worm.json @@ -4,5 +4,6 @@ "author": "Wildbow", "chapter_selector": "#main .entry-content a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']" + "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']", + "cover_url": "https://pre00.deviantart.net/969a/th/pre/i/2015/051/8/7/worm_cover_by_cactusfantastico-d8ivj4b.png" } From d357bd17e2979ec2fc7adc2bf7a743f5e1dceefb Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Sun, 2 Sep 2018 21:47:18 -0400 Subject: [PATCH 07/13] Clean up cover downloading and add logging --- ebook/cover.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ebook/cover.py b/ebook/cover.py index e191775..fd7377c 100644 --- a/ebook/cover.py +++ b/ebook/cover.py @@ -5,6 +5,7 @@ import textwrap import requests import logging +logger = logging.getLogger(__name__) def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30): img = Image.new("RGBA", (width, height), bgcolor) @@ -31,20 +32,22 @@ def make_cover(title, author, width=600, height=800, fontname="Helvetica", fonts def make_cover_from_url(url, title, author): try: + logger.info("Downloading cover from " + url) img = requests.Session().get(url) cover = BytesIO(img.content) + if Image.open(cover).format != "PNG": cover = _convert_to_png(cover) - except: - #logger.info("Encountered an error downloading cover, reverting to default cover") + except Exception as e: + logger.info("Encountered an error downloading cover: " + e) cover = make_cover(title, author) return cover def _convert_to_png(image_bytestream): - img = Image.open(image_bytestream) png_image = BytesIO() - img.save(png_image, format="PNG") + Image.open(image_bytestream).save(png_image, format="PNG") + png_image.name = 'cover.png' png_image.seek(0) return png_image From b25c497e468297cf6f9aa0e0e1e82585ed85de15 Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Sun, 2 Sep 2018 22:01:17 -0400 Subject: [PATCH 08/13] Describe cover_url in README --- README.markdown | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.markdown b/README.markdown index b35e678..c2bee16 100644 --- a/README.markdown +++ b/README.markdown @@ -84,7 +84,8 @@ Example `practical.json`: "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } ``` @@ -92,9 +93,9 @@ Run as: $ ./leech.py practical.json -This tells leech to load `url`, follow the links described by `chapter_selector`, extract the content from those pages as described by `content_selector`, and remove any content from *that* which matches `filter_selector`. +This tells leech to load `url`, follow the links described by `chapter_selector`, extract the content from those pages as described by `content_selector`, and remove any content from *that* which matches `filter_selector`. Optionally, `cover_url` will replace the default cover with the image of your choice. -If `chapter_selector` isn't given, it'll create a single-chapter book by applying `content_selector` to `url`. +If `chapter_selector` isn't given, it'll create a single-chapter book by applying `content_selector` to `url`. This is a fairly viable way to extract a story from, say, a random Wordpress installation. It's relatively likely to get you at least *most* of the way to the ebook you want, with maybe some manual editing needed. From cc29936d92e7dc1b454b019a4673afc0c9583a31 Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Sun, 2 Sep 2018 22:31:10 -0400 Subject: [PATCH 09/13] Fix whitespace --- ebook/cover.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ebook/cover.py b/ebook/cover.py index fd7377c..4420d42 100644 --- a/ebook/cover.py +++ b/ebook/cover.py @@ -7,6 +7,7 @@ import logging logger = logging.getLogger(__name__) + def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30): img = Image.new("RGBA", (width, height), bgcolor) draw = ImageDraw.Draw(img) @@ -30,6 +31,7 @@ def make_cover(title, author, width=600, height=800, fontname="Helvetica", fonts output.seek(0) return output + def make_cover_from_url(url, title, author): try: logger.info("Downloading cover from " + url) @@ -38,12 +40,13 @@ def make_cover_from_url(url, title, author): if Image.open(cover).format != "PNG": cover = _convert_to_png(cover) - except Exception as e: + except Exception as e: logger.info("Encountered an error downloading cover: " + e) cover = make_cover(title, author) return cover + def _convert_to_png(image_bytestream): png_image = BytesIO() Image.open(image_bytestream).save(png_image, format="PNG") @@ -52,6 +55,7 @@ def _convert_to_png(image_bytestream): return png_image + def _safe_font(preferred, *args, **kwargs): for font in (preferred, "Helvetica", "FreeSans", "Arial"): try: From f2fc2c11dbf2209f3bb68761e393c900966a844a Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Mon, 10 Sep 2018 23:02:03 -0400 Subject: [PATCH 10/13] Capture cover options from leech.json and pass them to generate_epub() --- ebook/__init__.py | 3 ++- leech.py | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ebook/__init__.py b/ebook/__init__.py index ed52ebd..4a25801 100644 --- a/ebook/__init__.py +++ b/ebook/__init__.py @@ -70,6 +70,7 @@ class CoverOptions: wrapat = attr.ib(default=None, convert=attr.converters.optional(int)) bgcolor = attr.ib(default=None, convert=attr.converters.optional(tuple)) textcolor = attr.ib(default=None, convert=attr.converters.optional(tuple)) + cover_url = attr.ib(default=None, convert=attr.converters.optional(str)) def chapter_html(story, titleprefix=None): @@ -90,7 +91,7 @@ def chapter_html(story, titleprefix=None): return chapters -def generate_epub(story, output_filename=None, cover_options={}): +def generate_epub(story, cover_options={}, output_filename=None): dates = list(story.dates()) metadata = { 'title': story.title, diff --git a/leech.py b/leech.py index bcf12d5..d906613 100755 --- a/leech.py +++ b/leech.py @@ -56,11 +56,12 @@ def load_on_disk_options(site): store = json.load(store_file) login = store.get('logins', {}).get(site.__name__, False) configured_site_options = store.get('site_options', {}).get(site.__name__, {}) + cover_options = store.get('cover') except FileNotFoundError: logger.info("Unable to locate leech.json. Continuing assuming it does not exist.") login = False configured_site_options = {} - return configured_site_options, login + return configured_site_options, login, cover_options def create_options(site, site_options, unused_flags): @@ -71,7 +72,7 @@ def create_options(site, site_options, unused_flags): flag_specified_site_options = site.interpret_site_specific_options(**unused_flags) - configured_site_options, login = load_on_disk_options(site) + configured_site_options, login, cover_options = load_on_disk_options(site) overridden_site_options = json.loads(site_options) @@ -81,7 +82,8 @@ def create_options(site, site_options, unused_flags): list(default_site_options.items()) + list(configured_site_options.items()) + list(overridden_site_options.items()) + - list(flag_specified_site_options.items()) + list(flag_specified_site_options.items()) + + list(cover_options.items()) ) return options, login @@ -146,7 +148,7 @@ def download(url, site_options, cache, verbose, **other_flags): options, login = create_options(site, site_options, other_flags) story = open_story(site, url, session, login, options) - filename = ebook.generate_epub(story) + filename = ebook.generate_epub(story, options) logger.info("File created: " + filename) From 1f57305e11215d4813a89a28e8d1c868abfd9bce Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Mon, 10 Sep 2018 23:13:26 -0400 Subject: [PATCH 11/13] Download cover image if cover_url is in json --- ebook/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ebook/__init__.py b/ebook/__init__.py index 4a25801..dbb125b 100644 --- a/ebook/__init__.py +++ b/ebook/__init__.py @@ -107,7 +107,9 @@ def generate_epub(story, cover_options={}, output_filename=None): # The cover is static, and the only change comes from the image which we generate html = [('Cover', 'cover.html', cover_template)] - if story.cover_url: + if cover_options["cover_url"]: + image = make_cover_from_url(cover_options["cover_url"], story.title, story.author) + elif story.cover_url: image = make_cover_from_url(story.cover_url, story.title, story.author) else: image = make_cover(story.title, story.author, **cover_options) From 0c37727219eabea2baca0daddcd82317414cb2a2 Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Mon, 10 Sep 2018 23:43:56 -0400 Subject: [PATCH 12/13] Add example of cover_url to readme --- README.markdown | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index c2bee16..759e65a 100644 --- a/README.markdown +++ b/README.markdown @@ -65,7 +65,8 @@ Example: "fontname": "Comic Sans MS", "fontsize": 30, "bgcolor": [20, 120, 20], - "textcolor": [180, 20, 180] + "textcolor": [180, 20, 180], + "cover_url": "https://website.com/image.png" } } ``` From 60084534a841868c117fa57828fa41f42321ab2e Mon Sep 17 00:00:00 2001 From: Alex Raubach Date: Sat, 15 Sep 2018 11:03:52 -0400 Subject: [PATCH 13/13] Create empty dict when leech.json not present --- ebook/__init__.py | 2 +- leech.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ebook/__init__.py b/ebook/__init__.py index dbb125b..dddaaa1 100644 --- a/ebook/__init__.py +++ b/ebook/__init__.py @@ -107,7 +107,7 @@ def generate_epub(story, cover_options={}, output_filename=None): # The cover is static, and the only change comes from the image which we generate html = [('Cover', 'cover.html', cover_template)] - if cover_options["cover_url"]: + if cover_options and cover_options["cover_url"]: image = make_cover_from_url(cover_options["cover_url"], story.title, story.author) elif story.cover_url: image = make_cover_from_url(story.cover_url, story.title, story.author) diff --git a/leech.py b/leech.py index d906613..e4314eb 100755 --- a/leech.py +++ b/leech.py @@ -56,11 +56,12 @@ def load_on_disk_options(site): store = json.load(store_file) login = store.get('logins', {}).get(site.__name__, False) configured_site_options = store.get('site_options', {}).get(site.__name__, {}) - cover_options = store.get('cover') + cover_options = store.get('cover', {}) except FileNotFoundError: logger.info("Unable to locate leech.json. Continuing assuming it does not exist.") login = False configured_site_options = {} + cover_options = {} return configured_site_options, login, cover_options