1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2025-12-06 16:33:16 +01:00

feat: Leech can now compress images to a specific target size

This commit is contained in:
Emmanuel Jemeni 2023-04-03 17:26:57 +01:00 committed by David Lynch
parent 55e400b535
commit 34bf962df6
4 changed files with 111 additions and 11 deletions

View file

@ -84,6 +84,8 @@ Example:
}, },
"images": true, "images": true,
"image_format": "png", "image_format": "png",
"compress_images": true,
"max_image_size": 100000,
"cover": { "cover": {
"fontname": "Comic Sans MS", "fontname": "Comic Sans MS",
"fontsize": 30, "fontsize": 30,
@ -106,6 +108,24 @@ Example:
> Note: If the `image_format` key does not exist, Leech will default to `jpeg`. > Note: If the `image_format` key does not exist, Leech will default to `jpeg`.
> The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive. > The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive.
> Note: The `compress_images` key tells Leech to compress images. This is only supported for `jpeg` and `png` images.
> This also goes hand-in-hand with the `max_image_size` key. If the `compress_images` key is `true` but there's no `max_image_size` key,
> Leech will compress the image to a size less than 1MB (1000000 bytes). If the `max_image_size` key is present, Leech will compress the image
> to a size less than the value of the `max_image_size` key. The `max_image_size` key is in bytes.
> If `compress_images` is `false`, Leech will ignore the `max_image_size` key.
> Warning: Compressing images might make Leech take a lot longer to download images.
> Warning: Compressing images might make the image quality worse.
> Warning: `max_image_size` is not a hard limit. Leech will try to compress the image to the size of the `max_image_size` key, but Leech might
> not be able to compress the image to the exact size of the `max_image_size` key.
> Warning: `max_image_size` should not be too small. For instance, if you set `max_image_size` to 1000, Leech will probably not be able to
> compress the image to 1000 bytes. If you set `max_image_size` to 1000000, Leech will probably be able to compress the image to 1000000 bytes.
> Warning: Leech will not compress GIFs, that might damage the animation.
Arbitrary Sites Arbitrary Sites
--- ---

View file

@ -79,7 +79,15 @@ class CoverOptions:
cover_url = attr.ib(default=None, converter=attr.converters.optional(str)) cover_url = attr.ib(default=None, converter=attr.converters.optional(str))
def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None, normalize=False): def chapter_html(
story,
image_bool=False,
image_format="JPEG",
compress_images=False,
max_image_size=1_000_000,
titleprefix=None,
normalize=False
):
chapters = [] chapters = []
for i, chapter in enumerate(story): for i, chapter in enumerate(story):
title = chapter.title or f'#{i}' title = chapter.title or f'#{i}'
@ -99,7 +107,7 @@ def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None,
print(f"Image {count} has no src attribute, skipping...") print(f"Image {count} has no src attribute, skipping...")
continue continue
print(f"[Chapter {i}] Image ({count+1} out of {len_of_all_images}). Source: ", end="") print(f"[Chapter {i}] Image ({count+1} out of {len_of_all_images}). Source: ", end="")
img_contents = get_image_from_url(img['src'], image_format) img_contents = get_image_from_url(img['src'], image_format, compress_images, max_image_size)
chapter.images.append(Image( chapter.images.append(Image(
path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}", path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}",
contents=img_contents[0], contents=img_contents[0],
@ -145,7 +153,12 @@ def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None,
def generate_epub(story, cover_options={}, image_options=None, output_filename=None, output_dir=None, normalize=False): def generate_epub(story, cover_options={}, image_options=None, output_filename=None, output_dir=None, normalize=False):
if image_options is None: if image_options is None:
image_options = {'image_bool': False, 'image_format': 'JPEG'} image_options = {
'image_bool': False,
'image_format': 'JPEG',
'compress_images': False,
'max_image_size': 1_000_000
}
dates = list(story.dates()) dates = list(story.dates())
metadata = { metadata = {
'title': story.title, 'title': story.title,
@ -192,6 +205,8 @@ def generate_epub(story, cover_options={}, image_options=None, output_filename=
story, story,
image_bool=image_options.get('image_bool'), image_bool=image_options.get('image_bool'),
image_format=image_options.get('image_format'), image_format=image_options.get('image_format'),
compress_images=image_options.get('compress_images'),
max_image_size=image_options.get('max_image_size'),
normalize=normalize normalize=normalize
), ),
EpubFile( EpubFile(

View file

@ -3,6 +3,7 @@ import PIL
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
from io import BytesIO from io import BytesIO
from base64 import b64decode from base64 import b64decode
import math
import textwrap import textwrap
import requests import requests
import logging import logging
@ -44,6 +45,44 @@ def make_image(
return output return output
def get_size_format(b, factor=1000, suffix="B"):
"""
Scale bytes to its proper byte format
e.g:
1253656 => '1.20MB'
1253656678 => '1.17GB'
"""
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
if b < factor:
return f"{b:.2f}{unit}{suffix}"
b /= factor
return f"{b:.2f}Y{suffix}"
def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image:
image_size = get_size_format(len(image.getvalue()))
logger.info(f"Image size: {image_size}")
big_photo = Image.open(image).convert("RGBA")
target_pixel_count = 2.8114 * target_size
if len(image.getvalue()) > target_size:
logger.info(f"Image is greater than {get_size_format(target_size)}, compressing")
scale_factor = target_pixel_count / math.prod(big_photo.size)
if scale_factor < 1:
x, y = tuple(int(scale_factor * dim) for dim in big_photo.size)
logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})")
sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS)
else:
sml_photo = big_photo
compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format)))
logger.info(f"Compressed image size: {compressed_image_size}")
return sml_photo
else:
logger.info(f"Image is less than {get_size_format(target_size)}, not compressing")
return big_photo
def PIL_Image_to_bytes( def PIL_Image_to_bytes(
pil_image: PIL.Image.Image, pil_image: PIL.Image.Image,
image_format: str image_format: str
@ -74,13 +113,20 @@ def PIL_Image_to_bytes(
return out_io.getvalue() return out_io.getvalue()
def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str, str]: def get_image_from_url(
url: str,
image_format: str = "JPEG",
compress_images: bool = False,
max_image_size: int = 1_000_000
) -> Tuple[bytes, str, str]:
""" """
Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
an image tag and returns the image data, the image format and the image mime type an image tag and returns the image data, the image format and the image mime type
@param url: The url of the image @param url: The url of the image
@param image_format: The format to convert the image to if it's not in the supported formats @param image_format: The format to convert the image to if it's not in the supported formats
@param compress_images: Whether to compress the image or not
@param max_image_size: The maximum size of the image in bytes
@return: A tuple of the image data, the image format and the image mime type @return: A tuple of the image data, the image format and the image mime type
""" """
try: try:
@ -90,8 +136,15 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
elif url.startswith("data:image") and 'base64' in url: elif url.startswith("data:image") and 'base64' in url:
logger.info("Base64 image detected") logger.info("Base64 image detected")
head, base64data = url.split(',') head, base64data = url.split(',')
file_ext = head.split(';')[0].split('/')[1] file_ext = str(head.split(';')[0].split('/')[1])
imgdata = b64decode(base64data) imgdata = b64decode(base64data)
if compress_images:
if file_ext.lower() == "gif":
logger.info("GIF images should not be compressed, skipping compression")
else:
compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext)
imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext)
if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]: if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
logger.info(f"Image format {file_ext} not supported by EPUB2.0.1, converting to {image_format}") logger.info(f"Image format {file_ext} not supported by EPUB2.0.1, converting to {image_format}")
return _convert_to_new_format(imgdata, image_format).read(), image_format.lower(), f"image/{image_format.lower()}" return _convert_to_new_format(imgdata, image_format).read(), image_format.lower(), f"image/{image_format.lower()}"
@ -103,7 +156,7 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
image.seek(0) image.seek(0)
PIL_image = Image.open(image) PIL_image = Image.open(image)
img_format = PIL_image.format img_format = str(PIL_image.format)
if img_format.lower() == "gif": if img_format.lower() == "gif":
PIL_image = Image.open(image) PIL_image = Image.open(image)
@ -111,6 +164,9 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
PIL_image.info['version'] = b"GIF89a" PIL_image.info['version'] = b"GIF89a"
return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif" return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"
if compress_images:
PIL_image = compress_image(image, max_image_size, img_format)
return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}" return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}"
except Exception as e: except Exception as e:
@ -119,7 +175,7 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
return cover, "jpeg", "image/jpeg" return cover, "jpeg", "image/jpeg"
def _convert_to_new_format(image_bytestream, image_format): def _convert_to_new_format(image_bytestream, image_format: str):
new_image = BytesIO() new_image = BytesIO()
try: try:
Image.open(image_bytestream).save(new_image, format=image_format.upper()) Image.open(image_bytestream).save(new_image, format=image_format.upper())

View file

@ -60,6 +60,8 @@ def load_on_disk_options(site):
login = store.get('logins', {}).get(site.site_key(), False) login = store.get('logins', {}).get(site.site_key(), False)
image_bool: bool = store.get('images', False) image_bool: bool = store.get('images', False)
image_format: str = store.get('image_format', 'jpeg') image_format: str = store.get('image_format', 'jpeg')
compress_images: bool = store.get('compress_images', False)
max_image_size: int = store.get('max_image_size', 1_000_000)
configured_site_options = store.get('site_options', {}).get(site.site_key(), {}) configured_site_options = store.get('site_options', {}).get(site.site_key(), {})
cover_options = store.get('cover', {}) cover_options = store.get('cover', {})
output_dir = store.get('output_dir', False) output_dir = store.get('output_dir', False)
@ -68,12 +70,14 @@ def load_on_disk_options(site):
login = False login = False
image_bool = False image_bool = False
image_format = 'jpeg' image_format = 'jpeg'
compress_images = False
max_image_size = 1_000_000
configured_site_options = {} configured_site_options = {}
cover_options = {} cover_options = {}
output_dir = False output_dir = False
if output_dir and 'output_dir' not in configured_site_options: if output_dir and 'output_dir' not in configured_site_options:
configured_site_options['output_dir'] = output_dir configured_site_options['output_dir'] = output_dir
return configured_site_options, login, cover_options, image_bool, image_format return configured_site_options, login, cover_options, image_bool, image_format, compress_images, max_image_size
def create_options(site, site_options, unused_flags): def create_options(site, site_options, unused_flags):
@ -84,7 +88,7 @@ def create_options(site, site_options, unused_flags):
flag_specified_site_options = site.interpret_site_specific_options(**unused_flags) flag_specified_site_options = site.interpret_site_specific_options(**unused_flags)
configured_site_options, login, cover_options, image_bool, image_format = load_on_disk_options(site) configured_site_options, login, cover_options, image_bool, image_format, compress_images, max_image_size = load_on_disk_options(site)
overridden_site_options = json.loads(site_options) overridden_site_options = json.loads(site_options)
@ -96,7 +100,7 @@ def create_options(site, site_options, unused_flags):
list(overridden_site_options.items()) + list(overridden_site_options.items()) +
list(flag_specified_site_options.items()) + list(flag_specified_site_options.items()) +
list(cover_options.items()) + list(cover_options.items()) +
list({'image_bool': image_bool, 'image_format': image_format}.items()) list({'image_bool': image_bool, 'image_format': image_format, 'compress_images': compress_images, 'max_image_size': max_image_size }.items())
) )
return options, login return options, login
@ -174,7 +178,12 @@ def download(urls, site_options, cache, verbose, normalize, output_dir, **other_
if story: if story:
filename = ebook.generate_epub( filename = ebook.generate_epub(
story, options, story, options,
image_options={'image_bool': options['image_bool'], 'image_format': options['image_format'] or 'jpeg'}, image_options={
'image_bool': options['image_bool'] or False,
'image_format': options['image_format'] or 'jpeg',
'compress_images': options['compress_images'] or False,
'max_image_size': options['max_image_size'] or 1_000_000
},
normalize=normalize, normalize=normalize,
output_dir=output_dir or options.get('output_dir', os.getcwd()) output_dir=output_dir or options.get('output_dir', os.getcwd())
) )