1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2025-12-06 16:33:16 +01:00

feat: Leech can now compress images to a specific target size

This commit is contained in:
Emmanuel Jemeni 2023-04-03 17:26:57 +01:00 committed by David Lynch
parent 55e400b535
commit 34bf962df6
4 changed files with 111 additions and 11 deletions

View file

@ -84,6 +84,8 @@ Example:
},
"images": true,
"image_format": "png",
"compress_images": true,
"max_image_size": 100000,
"cover": {
"fontname": "Comic Sans MS",
"fontsize": 30,
@ -106,6 +108,24 @@ Example:
> Note: If the `image_format` key does not exist, Leech will default to `jpeg`.
> The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive.
> Note: The `compress_images` key tells Leech to compress images. This is only supported for `jpeg` and `png` images.
> This also goes hand-in-hand with the `max_image_size` key. If the `compress_images` key is `true` but there's no `max_image_size` key,
> Leech will compress the image to a size less than 1MB (1000000 bytes). If the `max_image_size` key is present, Leech will compress the image
> to a size less than the value of the `max_image_size` key. The `max_image_size` key is in bytes.
> If `compress_images` is `false`, Leech will ignore the `max_image_size` key.
> Warning: Compressing images might make Leech take a lot longer to download images.
> Warning: Compressing images might make the image quality worse.
> Warning: `max_image_size` is not a hard limit. Leech will try to compress the image to the size of the `max_image_size` key, but Leech might
> not be able to compress the image to the exact size of the `max_image_size` key.
> Warning: `max_image_size` should not be too small. For instance, if you set `max_image_size` to 1000, Leech will probably not be able to
> compress the image to 1000 bytes. If you set `max_image_size` to 1000000, Leech will probably be able to compress the image to 1000000 bytes.
> Warning: Leech will not compress GIFs, that might damage the animation.
Arbitrary Sites
---

View file

@ -79,7 +79,15 @@ class CoverOptions:
cover_url = attr.ib(default=None, converter=attr.converters.optional(str))
def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None, normalize=False):
def chapter_html(
story,
image_bool=False,
image_format="JPEG",
compress_images=False,
max_image_size=1_000_000,
titleprefix=None,
normalize=False
):
chapters = []
for i, chapter in enumerate(story):
title = chapter.title or f'#{i}'
@ -99,7 +107,7 @@ def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None,
print(f"Image {count} has no src attribute, skipping...")
continue
print(f"[Chapter {i}] Image ({count+1} out of {len_of_all_images}). Source: ", end="")
img_contents = get_image_from_url(img['src'], image_format)
img_contents = get_image_from_url(img['src'], image_format, compress_images, max_image_size)
chapter.images.append(Image(
path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}",
contents=img_contents[0],
@ -145,7 +153,12 @@ def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None,
def generate_epub(story, cover_options={}, image_options=None, output_filename=None, output_dir=None, normalize=False):
if image_options is None:
image_options = {'image_bool': False, 'image_format': 'JPEG'}
image_options = {
'image_bool': False,
'image_format': 'JPEG',
'compress_images': False,
'max_image_size': 1_000_000
}
dates = list(story.dates())
metadata = {
'title': story.title,
@ -192,6 +205,8 @@ def generate_epub(story, cover_options={}, image_options=None, output_filename=
story,
image_bool=image_options.get('image_bool'),
image_format=image_options.get('image_format'),
compress_images=image_options.get('compress_images'),
max_image_size=image_options.get('max_image_size'),
normalize=normalize
),
EpubFile(

View file

@ -3,6 +3,7 @@ import PIL
from PIL import Image, ImageDraw, ImageFont
from io import BytesIO
from base64 import b64decode
import math
import textwrap
import requests
import logging
@ -44,6 +45,44 @@ def make_image(
return output
def get_size_format(b, factor=1000, suffix="B"):
"""
Scale bytes to its proper byte format
e.g:
1253656 => '1.20MB'
1253656678 => '1.17GB'
"""
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
if b < factor:
return f"{b:.2f}{unit}{suffix}"
b /= factor
return f"{b:.2f}Y{suffix}"
def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image:
image_size = get_size_format(len(image.getvalue()))
logger.info(f"Image size: {image_size}")
big_photo = Image.open(image).convert("RGBA")
target_pixel_count = 2.8114 * target_size
if len(image.getvalue()) > target_size:
logger.info(f"Image is greater than {get_size_format(target_size)}, compressing")
scale_factor = target_pixel_count / math.prod(big_photo.size)
if scale_factor < 1:
x, y = tuple(int(scale_factor * dim) for dim in big_photo.size)
logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})")
sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS)
else:
sml_photo = big_photo
compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format)))
logger.info(f"Compressed image size: {compressed_image_size}")
return sml_photo
else:
logger.info(f"Image is less than {get_size_format(target_size)}, not compressing")
return big_photo
def PIL_Image_to_bytes(
pil_image: PIL.Image.Image,
image_format: str
@ -74,13 +113,20 @@ def PIL_Image_to_bytes(
return out_io.getvalue()
def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str, str]:
def get_image_from_url(
url: str,
image_format: str = "JPEG",
compress_images: bool = False,
max_image_size: int = 1_000_000
) -> Tuple[bytes, str, str]:
"""
Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
an image tag and returns the image data, the image format and the image mime type
@param url: The url of the image
@param image_format: The format to convert the image to if it's not in the supported formats
@param compress_images: Whether to compress the image or not
@param max_image_size: The maximum size of the image in bytes
@return: A tuple of the image data, the image format and the image mime type
"""
try:
@ -90,8 +136,15 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
elif url.startswith("data:image") and 'base64' in url:
logger.info("Base64 image detected")
head, base64data = url.split(',')
file_ext = head.split(';')[0].split('/')[1]
file_ext = str(head.split(';')[0].split('/')[1])
imgdata = b64decode(base64data)
if compress_images:
if file_ext.lower() == "gif":
logger.info("GIF images should not be compressed, skipping compression")
else:
compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext)
imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext)
if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
logger.info(f"Image format {file_ext} not supported by EPUB2.0.1, converting to {image_format}")
return _convert_to_new_format(imgdata, image_format).read(), image_format.lower(), f"image/{image_format.lower()}"
@ -103,7 +156,7 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
image.seek(0)
PIL_image = Image.open(image)
img_format = PIL_image.format
img_format = str(PIL_image.format)
if img_format.lower() == "gif":
PIL_image = Image.open(image)
@ -111,6 +164,9 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
PIL_image.info['version'] = b"GIF89a"
return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"
if compress_images:
PIL_image = compress_image(image, max_image_size, img_format)
return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}"
except Exception as e:
@ -119,7 +175,7 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
return cover, "jpeg", "image/jpeg"
def _convert_to_new_format(image_bytestream, image_format):
def _convert_to_new_format(image_bytestream, image_format: str):
new_image = BytesIO()
try:
Image.open(image_bytestream).save(new_image, format=image_format.upper())

View file

@ -60,6 +60,8 @@ def load_on_disk_options(site):
login = store.get('logins', {}).get(site.site_key(), False)
image_bool: bool = store.get('images', False)
image_format: str = store.get('image_format', 'jpeg')
compress_images: bool = store.get('compress_images', False)
max_image_size: int = store.get('max_image_size', 1_000_000)
configured_site_options = store.get('site_options', {}).get(site.site_key(), {})
cover_options = store.get('cover', {})
output_dir = store.get('output_dir', False)
@ -68,12 +70,14 @@ def load_on_disk_options(site):
login = False
image_bool = False
image_format = 'jpeg'
compress_images = False
max_image_size = 1_000_000
configured_site_options = {}
cover_options = {}
output_dir = False
if output_dir and 'output_dir' not in configured_site_options:
configured_site_options['output_dir'] = output_dir
return configured_site_options, login, cover_options, image_bool, image_format
return configured_site_options, login, cover_options, image_bool, image_format, compress_images, max_image_size
def create_options(site, site_options, unused_flags):
@ -84,7 +88,7 @@ def create_options(site, site_options, unused_flags):
flag_specified_site_options = site.interpret_site_specific_options(**unused_flags)
configured_site_options, login, cover_options, image_bool, image_format = load_on_disk_options(site)
configured_site_options, login, cover_options, image_bool, image_format, compress_images, max_image_size = load_on_disk_options(site)
overridden_site_options = json.loads(site_options)
@ -96,7 +100,7 @@ def create_options(site, site_options, unused_flags):
list(overridden_site_options.items()) +
list(flag_specified_site_options.items()) +
list(cover_options.items()) +
list({'image_bool': image_bool, 'image_format': image_format}.items())
list({'image_bool': image_bool, 'image_format': image_format, 'compress_images': compress_images, 'max_image_size': max_image_size }.items())
)
return options, login
@ -174,7 +178,12 @@ def download(urls, site_options, cache, verbose, normalize, output_dir, **other_
if story:
filename = ebook.generate_epub(
story, options,
image_options={'image_bool': options['image_bool'], 'image_format': options['image_format'] or 'jpeg'},
image_options={
'image_bool': options['image_bool'] or False,
'image_format': options['image_format'] or 'jpeg',
'compress_images': options['compress_images'] or False,
'max_image_size': options['max_image_size'] or 1_000_000
},
normalize=normalize,
output_dir=output_dir or options.get('output_dir', os.getcwd())
)