1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2025-12-15 21:05:59 +01:00

fix: Completely fixes #2 !

This commit is contained in:
Emmanuel Jemeni 2023-04-03 16:09:43 +01:00 committed by David Lynch
parent f6dc5a9ad9
commit e6ad77a9fc
4 changed files with 158 additions and 57 deletions

View file

@ -49,6 +49,27 @@ Supports
* Sta.sh
* Completely arbitrary sites, with a bit more work (see below)
Images support
---
Leech creates EPUB 2.01 files, which means that Leech can only save images in the following
format:
- JPEG (JPG/JFIF)
- PNG
- GIF
See the [Open Publication Structure (OPS) 2.0.1](https://idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#TOC2.3.4) for more information.
Leech can not save images in SVG because it is not supported by Pillow.
Leech uses [Pillow](https://pillow.readthedocs.io/en/stable/index.html) for image manipulation and conversion. If you want to use a different
image format, you can install the required dependencies for Pillow and you will probably have to tinker with Leech. See the [Pillow documentation](https://pillow.readthedocs.io/en/stable/installation.html#external-libraries) for more information.
By default, Leech will try and save all non-animated images as JPEG because of its small size.
The only animated images that Leech will save are GIFs.
To configure image support, you will need to create a file called `leech.json`. See the section below for more information.
Configuration
---
@ -61,6 +82,8 @@ Example:
"logins": {
"QuestionableQuesting": ["username", "password"]
},
"images": true,
"image_format": "png",
"cover": {
"fontname": "Comic Sans MS",
"fontsize": 30,
@ -76,6 +99,12 @@ Example:
}
}
```
> Note: The `images` key is a boolean and can only be `true` or `false`. Booleans in JSON are written in lowercase.
> If it is `false`, Leech will not download any images.
> Leech will also ignore the `image_format` key if `images` is `false`.
> Note: If the `image_format` key does not exist, Leech will default to `jpeg`.
> The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive.
Arbitrary Sites
---

View file

@ -79,7 +79,7 @@ class CoverOptions:
cover_url = attr.ib(default=None, converter=attr.converters.optional(str))
def chapter_html(story, titleprefix=None, normalize=False):
def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None, normalize=False):
chapters = []
for i, chapter in enumerate(story):
title = chapter.title or f'#{i}'
@ -89,34 +89,42 @@ def chapter_html(story, titleprefix=None, normalize=False):
chapter, titleprefix=title, normalize=normalize))
else:
soup = BeautifulSoup(chapter.contents, 'html5lib')
all_images = soup.find_all('img')
len_of_all_images = len(all_images)
print(f"\nFound {len_of_all_images} images in chapter {i}\n")
if image_bool:
all_images = soup.find_all('img')
len_of_all_images = len(all_images)
print(f"Found {len_of_all_images} images in chapter {i}")
for count, img in enumerate(all_images):
if not img.has_attr('src'):
print(f"Image {count} has no src attribute, skipping...")
continue
print(f"Downloading image {count+1} out of {len_of_all_images} from chapter {i}")
img_contents = get_image_from_url(img['src']).read()
chapter.images.append(Image(
path=f"images/ch{i}_leechimage_{count}.png",
contents=img_contents,
content_type='image/png'
))
img['src'] = f"../images/ch{i}_leechimage_{count}.png"
if not img.has_attr('alt'):
img['alt'] = f"Image {count} from chapter {i}"
# Add all pictures on this chapter as well.
for image in chapter.images:
# For/else syntax, check if the image path already exists, if it doesn't add the image.
# Duplicates are not allowed in the format.
for other_file in chapters:
if other_file.path == image.path:
break
else:
chapters.append(EpubFile(
path=image.path, contents=image.contents, filetype=image.content_type))
for count, img in enumerate(all_images):
if not img.has_attr('src'):
print(f"Image {count} has no src attribute, skipping...")
continue
print(f"[Chapter {i}] Image ({count+1} out of {len_of_all_images}). Source: ", end="")
img_contents = get_image_from_url(img['src'], image_format)
chapter.images.append(Image(
path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}",
contents=img_contents[0],
content_type=img_contents[2]
))
img['src'] = f"../images/ch{i}_leechimage_{count}.{img_contents[1]}"
if not img.has_attr('alt'):
img['alt'] = f"Image {count} from chapter {i}"
# Add all pictures on this chapter as well.
for image in chapter.images:
# For/else syntax, check if the image path already exists, if it doesn't add the image.
# Duplicates are not allowed in the format.
for other_file in chapters:
if other_file.path == image.path:
break
else:
chapters.append(EpubFile(
path=image.path, contents=image.contents, filetype=image.content_type))
else:
# Remove all images from the chapter so you don't get that annoying grey background.
for img in soup.find_all('img'):
if img.parent.name.lower() == "figure":
img.parent.decompose()
else:
img.decompose()
title = titleprefix and f'{titleprefix}: {title}' or title
contents = str(soup)
@ -135,7 +143,9 @@ def chapter_html(story, titleprefix=None, normalize=False):
return chapters
def generate_epub(story, cover_options={}, output_filename=None, output_dir=None, normalize=False):
def generate_epub(story, cover_options={}, image_options=None, output_filename=None, output_dir=None, normalize=False):
if image_options is None:
image_options = {'image_bool': False, 'image_format': 'JPEG'}
dates = list(story.dates())
metadata = {
'title': story.title,
@ -178,7 +188,12 @@ def generate_epub(story, cover_options={}, output_filename=None, output_dir=None
EpubFile(title='Cover', path='cover.html', contents=cover_template),
EpubFile(title='Front Matter', path='frontmatter.html', contents=frontmatter_template.format(
now=datetime.datetime.now(), **metadata)),
*chapter_html(story, normalize=normalize),
*chapter_html(
story,
image_bool=image_options.get('image_bool'),
image_format=image_options.get('image_format'),
normalize=normalize
),
EpubFile(
path='Styles/base.css',
contents=requests.Session().get(

View file

@ -1,10 +1,14 @@
# Basically the same as cover.py with some minor differences
import PIL
from PIL import Image, ImageDraw, ImageFont
from io import BytesIO
from base64 import b64decode
import textwrap
import requests
import logging
from typing import Tuple
logger = logging.getLogger(__name__)
@ -21,7 +25,7 @@ def make_image(
"""
This function should only be called if get_image_from_url() fails
"""
img = Image.new("RGBA", (width, height), bg_color)
img = Image.new("RGB", (width, height), bg_color)
draw = ImageDraw.Draw(img)
message = textwrap.fill(message, wrap_at)
@ -33,46 +37,93 @@ def make_image(
# draw.text(((width - title_size[0]) / 2, 100), title, textcolor, font=font)
output = BytesIO()
img.save(output, "PNG")
output.name = 'cover.png'
img.save(output, "JPEG")
output.name = 'cover.jpeg'
# writing left the cursor at the end of the file, so reset it
output.seek(0)
return output
def get_image_from_url(url: str):
def PIL_Image_to_bytes(
pil_image: PIL.Image.Image,
image_format: str
) -> bytes:
out_io = BytesIO()
if image_format.lower().startswith("gif"):
frames = []
current = pil_image.convert('RGBA')
while True:
try:
frames.append(current)
pil_image.seek(pil_image.tell() + 1)
current = Image.alpha_composite(current, pil_image.convert('RGBA'))
except EOFError:
break
frames[0].save(out_io, format=image_format, save_all=True, append_images=frames[1:], optimize=True, loop=0)
return out_io.getvalue()
elif image_format.lower() in ["jpeg", "jpg"]:
pil_image = pil_image.convert("RGB")
pil_image.save(out_io, format=image_format, optimize=True, quality=95)
return out_io.getvalue()
def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str, str]:
"""
Basically the same as make_cover_from_url()
Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
an image tag and returns the image data, the image format and the image mime type
@param url: The url of the image
@param image_format: The format to convert the image to if it's not in the supported formats
@return: A tuple of the image data, the image format and the image mime type
"""
try:
if url.startswith("https://www.filepicker.io/api/"):
logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.")
url = f"https://cdn3.fiction.live/fp/{url.split('/')[-1]}?&quality=95"
elif url.startswith("data:image") and 'base64' in url:
logger.info("Base64 image detected")
head, base64data = url.split(',')
file_ext = head.split(';')[0].split('/')[1]
imgdata = b64decode(base64data)
if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
logger.info(f"Image format {file_ext} not supported by EPUB2.0.1, converting to {image_format}")
return _convert_to_new_format(imgdata, image_format).read(), image_format.lower(), f"image/{image_format.lower()}"
return imgdata, file_ext, f"image/{file_ext}"
print(url)
img = requests.Session().get(url)
logger.info("Downloading image from " + url)
cover = BytesIO(img.content)
image = BytesIO(img.content)
image.seek(0)
img_format = Image.open(cover).format
# The `Image.open` read a few bytes from the stream to work out the
# format, so reset it:
cover.seek(0)
PIL_image = Image.open(image)
img_format = PIL_image.format
if img_format.lower() == "gif":
PIL_image = Image.open(image)
if PIL_image.info['version'] not in [b"GIF89a", "GIF89a"]:
PIL_image.info['version'] = b"GIF89a"
return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"
return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}"
if img_format != "PNG":
cover = _convert_to_png(cover)
except Exception as e:
logger.info("Encountered an error downloading cover: " + str(e))
cover = make_image("There was a problem downloading this image.")
return cover
logger.info("Encountered an error downloading image: " + str(e))
cover = make_image("There was a problem downloading this image.").read()
return cover, "jpeg", "image/jpeg"
def _convert_to_png(image_bytestream):
png_image = BytesIO()
Image.open(image_bytestream).save(png_image, format="PNG")
png_image.name = 'cover.png'
png_image.seek(0)
return png_image
def _convert_to_new_format(image_bytestream, image_format):
new_image = BytesIO()
try:
Image.open(image_bytestream).save(new_image, format=image_format.upper())
new_image.name = f'cover.{image_format.lower()}'
new_image.seek(0)
except Exception as e:
logger.info(f"Encountered an error converting image to {image_format}\nError: {e}")
new_image = make_image("There was a problem converting this image.")
return new_image
def _safe_font(preferred, *args, **kwargs):

View file

@ -58,18 +58,22 @@ def load_on_disk_options(site):
with open('leech.json') as store_file:
store = json.load(store_file)
login = store.get('logins', {}).get(site.site_key(), False)
image_bool: bool = store.get('images', False)
image_format: str = store.get('image_format', 'jpeg')
configured_site_options = store.get('site_options', {}).get(site.site_key(), {})
cover_options = store.get('cover', {})
output_dir = store.get('output_dir', False)
except FileNotFoundError:
logger.info("Unable to locate leech.json. Continuing assuming it does not exist.")
login = False
image_bool = False
image_format = 'jpeg'
configured_site_options = {}
cover_options = {}
output_dir = False
if output_dir and 'output_dir' not in configured_site_options:
configured_site_options['output_dir'] = output_dir
return configured_site_options, login, cover_options
return configured_site_options, login, cover_options, image_bool, image_format
def create_options(site, site_options, unused_flags):
@ -80,7 +84,7 @@ def create_options(site, site_options, unused_flags):
flag_specified_site_options = site.interpret_site_specific_options(**unused_flags)
configured_site_options, login, cover_options = load_on_disk_options(site)
configured_site_options, login, cover_options, image_bool, image_format = load_on_disk_options(site)
overridden_site_options = json.loads(site_options)
@ -91,7 +95,8 @@ def create_options(site, site_options, unused_flags):
list(configured_site_options.items()) +
list(overridden_site_options.items()) +
list(flag_specified_site_options.items()) +
list(cover_options.items())
list(cover_options.items()) +
list({'image_bool': image_bool, 'image_format': image_format}.items())
)
return options, login
@ -169,6 +174,7 @@ def download(urls, site_options, cache, verbose, normalize, output_dir, **other_
if story:
filename = ebook.generate_epub(
story, options,
image_options={'image_bool': options['image_bool'], 'image_format': options['image_format'] or 'jpeg'},
normalize=normalize,
output_dir=output_dir or options.get('output_dir', os.getcwd())
)