diff --git a/README.markdown b/README.markdown index b35e678..759e65a 100644 --- a/README.markdown +++ b/README.markdown @@ -65,7 +65,8 @@ Example: "fontname": "Comic Sans MS", "fontsize": 30, "bgcolor": [20, 120, 20], - "textcolor": [180, 20, 180] + "textcolor": [180, 20, 180], + "cover_url": "https://website.com/image.png" } } ``` @@ -84,7 +85,8 @@ Example `practical.json`: "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } ``` @@ -92,9 +94,9 @@ Run as: $ ./leech.py practical.json -This tells leech to load `url`, follow the links described by `chapter_selector`, extract the content from those pages as described by `content_selector`, and remove any content from *that* which matches `filter_selector`. +This tells leech to load `url`, follow the links described by `chapter_selector`, extract the content from those pages as described by `content_selector`, and remove any content from *that* which matches `filter_selector`. Optionally, `cover_url` will replace the default cover with the image of your choice. -If `chapter_selector` isn't given, it'll create a single-chapter book by applying `content_selector` to `url`. +If `chapter_selector` isn't given, it'll create a single-chapter book by applying `content_selector` to `url`. This is a fairly viable way to extract a story from, say, a random Wordpress installation. It's relatively likely to get you at least *most* of the way to the ebook you want, with maybe some manual editing needed. diff --git a/ebook/__init__.py b/ebook/__init__.py index 3ea2b9c..c69d7aa 100644 --- a/ebook/__init__.py +++ b/ebook/__init__.py @@ -1,5 +1,6 @@ from .epub import make_epub from .cover import make_cover +from .cover import make_cover_from_url import datetime import requests @@ -69,6 +70,7 @@ class CoverOptions: wrapat = attr.ib(default=None, convert=attr.converters.optional(int)) bgcolor = attr.ib(default=None, convert=attr.converters.optional(tuple)) textcolor = attr.ib(default=None, convert=attr.converters.optional(tuple)) + cover_url = attr.ib(default=None, convert=attr.converters.optional(str)) def chapter_html(story, titleprefix=None): @@ -90,7 +92,7 @@ def chapter_html(story, titleprefix=None): return chapters -def generate_epub(story, output_filename=None, cover_options={}): +def generate_epub(story, cover_options={}, output_filename=None): dates = list(story.dates()) metadata = { 'title': story.title, @@ -106,7 +108,14 @@ def generate_epub(story, output_filename=None, cover_options={}): # The cover is static, and the only change comes from the image which we generate html = [('Cover', 'cover.html', cover_template)] - cover_image = ('images/cover.png', make_cover(story.title, story.author, **cover_options).read(), 'image/png') + if cover_options and cover_options["cover_url"]: + image = make_cover_from_url(cover_options["cover_url"], story.title, story.author) + elif story.cover_url: + image = make_cover_from_url(story.cover_url, story.title, story.author) + else: + image = make_cover(story.title, story.author, **cover_options) + + cover_image = ('images/cover.png', image.read(), 'image/png') html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata))) diff --git a/ebook/cover.py b/ebook/cover.py index cf56232..4420d42 100644 --- a/ebook/cover.py +++ b/ebook/cover.py @@ -2,6 +2,10 @@ from PIL import Image, ImageDraw, ImageFont from io import BytesIO import textwrap +import requests +import logging + +logger = logging.getLogger(__name__) def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30): @@ -28,6 +32,30 @@ def make_cover(title, author, width=600, height=800, fontname="Helvetica", fonts return output +def make_cover_from_url(url, title, author): + try: + logger.info("Downloading cover from " + url) + img = requests.Session().get(url) + cover = BytesIO(img.content) + + if Image.open(cover).format != "PNG": + cover = _convert_to_png(cover) + except Exception as e: + logger.info("Encountered an error downloading cover: " + e) + cover = make_cover(title, author) + + return cover + + +def _convert_to_png(image_bytestream): + png_image = BytesIO() + Image.open(image_bytestream).save(png_image, format="PNG") + png_image.name = 'cover.png' + png_image.seek(0) + + return png_image + + def _safe_font(preferred, *args, **kwargs): for font in (preferred, "Helvetica", "FreeSans", "Arial"): try: diff --git a/examples/practical1.json b/examples/practical1.json index 00e1d20..214dae0 100644 --- a/examples/practical1.json +++ b/examples/practical1.json @@ -4,5 +4,6 @@ "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul:nth-of-type(1) > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } diff --git a/examples/practical2.json b/examples/practical2.json index 2dfd4c9..0bb6631 100644 --- a/examples/practical2.json +++ b/examples/practical2.json @@ -4,5 +4,6 @@ "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul:nth-of-type(2) > li > ul > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } diff --git a/examples/practical3.json b/examples/practical3.json index cc883fb..c0adc43 100644 --- a/examples/practical3.json +++ b/examples/practical3.json @@ -4,5 +4,6 @@ "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul:nth-of-type(3) > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } diff --git a/examples/practical4.json b/examples/practical4.json index 76bf606..f958064 100644 --- a/examples/practical4.json +++ b/examples/practical4.json @@ -4,5 +4,6 @@ "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul:nth-of-type(4) > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } diff --git a/examples/thegodsarebastards.json b/examples/thegodsarebastards.json index 9fa80cd..701d026 100644 --- a/examples/thegodsarebastards.json +++ b/examples/thegodsarebastards.json @@ -4,5 +4,6 @@ "author": "D. D. Webb", "chapter_selector": "article .entry-content a[href*='20']", "content_selector": "article .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']" + "filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']", + "cover_url": "https://tiraas.files.wordpress.com/2016/02/classof1182byhoarous.png" } diff --git a/examples/worm.json b/examples/worm.json index a021a5e..37b6b9b 100644 --- a/examples/worm.json +++ b/examples/worm.json @@ -4,5 +4,6 @@ "author": "Wildbow", "chapter_selector": "#main .entry-content a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']" + "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']", + "cover_url": "https://pre00.deviantart.net/969a/th/pre/i/2015/051/8/7/worm_cover_by_cactusfantastico-d8ivj4b.png" } diff --git a/leech.py b/leech.py index bcf12d5..e4314eb 100755 --- a/leech.py +++ b/leech.py @@ -56,11 +56,13 @@ def load_on_disk_options(site): store = json.load(store_file) login = store.get('logins', {}).get(site.__name__, False) configured_site_options = store.get('site_options', {}).get(site.__name__, {}) + cover_options = store.get('cover', {}) except FileNotFoundError: logger.info("Unable to locate leech.json. Continuing assuming it does not exist.") login = False configured_site_options = {} - return configured_site_options, login + cover_options = {} + return configured_site_options, login, cover_options def create_options(site, site_options, unused_flags): @@ -71,7 +73,7 @@ def create_options(site, site_options, unused_flags): flag_specified_site_options = site.interpret_site_specific_options(**unused_flags) - configured_site_options, login = load_on_disk_options(site) + configured_site_options, login, cover_options = load_on_disk_options(site) overridden_site_options = json.loads(site_options) @@ -81,7 +83,8 @@ def create_options(site, site_options, unused_flags): list(default_site_options.items()) + list(configured_site_options.items()) + list(overridden_site_options.items()) + - list(flag_specified_site_options.items()) + list(flag_specified_site_options.items()) + + list(cover_options.items()) ) return options, login @@ -146,7 +149,7 @@ def download(url, site_options, cache, verbose, **other_flags): options, login = create_options(site, site_options, other_flags) story = open_story(site, url, session, login, options) - filename = ebook.generate_epub(story) + filename = ebook.generate_epub(story, options) logger.info("File created: " + filename) diff --git a/sites/__init__.py b/sites/__init__.py index 63bb0bb..313496f 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -30,6 +30,7 @@ class Section: title = attr.ib() author = attr.ib() url = attr.ib() + cover_url = attr.ib(default='') id = attr.ib(default=attr.Factory(_default_uuid_string), convert=str) contents = attr.ib(default=attr.Factory(list)) footnotes = attr.ib(default=attr.Factory(list)) diff --git a/sites/arbitrary.py b/sites/arbitrary.py index a7d11eb..80c2383 100644 --- a/sites/arbitrary.py +++ b/sites/arbitrary.py @@ -18,7 +18,8 @@ Example JSON: "author": "erraticerrata", "chapter_selector": "#main .entry-content > ul > li > a", "content_selector": "#main .entry-content", - "filter_selector": ".sharedaddy, .wpcnt, style" + "filter_selector": ".sharedaddy, .wpcnt, style", + "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" } """ @@ -39,6 +40,7 @@ class SiteDefinition: next_selector = attr.ib(default=False) # If present, use to filter out content that matches the selector filter_selector = attr.ib(default=False) + cover_url = attr.ib(default='') @register @@ -58,7 +60,8 @@ class Arbitrary(Site): story = Section( title=definition.title, author=definition.author, - url=url + url=url, + cover_url=definition.cover_url ) if definition.chapter_selector: diff --git a/sites/royalroad.py b/sites/royalroad.py index e41c668..1c1d59e 100644 --- a/sites/royalroad.py +++ b/sites/royalroad.py @@ -33,7 +33,8 @@ class RoyalRoad(Site): story = Section( title=soup.find('h1', property='name').string.strip(), author=soup.find('meta', property='books:author').get('content').strip(), - url=soup.find('meta', property='og:url').get('content').strip() + url=soup.find('meta', property='og:url').get('content').strip(), + cover_url=soup.find('img', class_='thumbnail')['src'] ) for chapter in soup.select('#chapters tbody tr[data-url]'):