mirror of
https://github.com/kemayo/leech
synced 2025-12-06 16:33:16 +01:00
feat: Leech can now compress images to a specific target size
This commit is contained in:
parent
55e400b535
commit
34bf962df6
4 changed files with 111 additions and 11 deletions
|
|
@ -84,6 +84,8 @@ Example:
|
||||||
},
|
},
|
||||||
"images": true,
|
"images": true,
|
||||||
"image_format": "png",
|
"image_format": "png",
|
||||||
|
"compress_images": true,
|
||||||
|
"max_image_size": 100000,
|
||||||
"cover": {
|
"cover": {
|
||||||
"fontname": "Comic Sans MS",
|
"fontname": "Comic Sans MS",
|
||||||
"fontsize": 30,
|
"fontsize": 30,
|
||||||
|
|
@ -106,6 +108,24 @@ Example:
|
||||||
> Note: If the `image_format` key does not exist, Leech will default to `jpeg`.
|
> Note: If the `image_format` key does not exist, Leech will default to `jpeg`.
|
||||||
> The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive.
|
> The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive.
|
||||||
|
|
||||||
|
> Note: The `compress_images` key tells Leech to compress images. This is only supported for `jpeg` and `png` images.
|
||||||
|
> This also goes hand-in-hand with the `max_image_size` key. If the `compress_images` key is `true` but there's no `max_image_size` key,
|
||||||
|
> Leech will compress the image to a size less than 1MB (1000000 bytes). If the `max_image_size` key is present, Leech will compress the image
|
||||||
|
> to a size less than the value of the `max_image_size` key. The `max_image_size` key is in bytes.
|
||||||
|
> If `compress_images` is `false`, Leech will ignore the `max_image_size` key.
|
||||||
|
|
||||||
|
> Warning: Compressing images might make Leech take a lot longer to download images.
|
||||||
|
|
||||||
|
> Warning: Compressing images might make the image quality worse.
|
||||||
|
|
||||||
|
> Warning: `max_image_size` is not a hard limit. Leech will try to compress the image to the size of the `max_image_size` key, but Leech might
|
||||||
|
> not be able to compress the image to the exact size of the `max_image_size` key.
|
||||||
|
|
||||||
|
> Warning: `max_image_size` should not be too small. For instance, if you set `max_image_size` to 1000, Leech will probably not be able to
|
||||||
|
> compress the image to 1000 bytes. If you set `max_image_size` to 1000000, Leech will probably be able to compress the image to 1000000 bytes.
|
||||||
|
|
||||||
|
> Warning: Leech will not compress GIFs, that might damage the animation.
|
||||||
|
|
||||||
Arbitrary Sites
|
Arbitrary Sites
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,15 @@ class CoverOptions:
|
||||||
cover_url = attr.ib(default=None, converter=attr.converters.optional(str))
|
cover_url = attr.ib(default=None, converter=attr.converters.optional(str))
|
||||||
|
|
||||||
|
|
||||||
def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None, normalize=False):
|
def chapter_html(
|
||||||
|
story,
|
||||||
|
image_bool=False,
|
||||||
|
image_format="JPEG",
|
||||||
|
compress_images=False,
|
||||||
|
max_image_size=1_000_000,
|
||||||
|
titleprefix=None,
|
||||||
|
normalize=False
|
||||||
|
):
|
||||||
chapters = []
|
chapters = []
|
||||||
for i, chapter in enumerate(story):
|
for i, chapter in enumerate(story):
|
||||||
title = chapter.title or f'#{i}'
|
title = chapter.title or f'#{i}'
|
||||||
|
|
@ -99,7 +107,7 @@ def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None,
|
||||||
print(f"Image {count} has no src attribute, skipping...")
|
print(f"Image {count} has no src attribute, skipping...")
|
||||||
continue
|
continue
|
||||||
print(f"[Chapter {i}] Image ({count+1} out of {len_of_all_images}). Source: ", end="")
|
print(f"[Chapter {i}] Image ({count+1} out of {len_of_all_images}). Source: ", end="")
|
||||||
img_contents = get_image_from_url(img['src'], image_format)
|
img_contents = get_image_from_url(img['src'], image_format, compress_images, max_image_size)
|
||||||
chapter.images.append(Image(
|
chapter.images.append(Image(
|
||||||
path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}",
|
path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}",
|
||||||
contents=img_contents[0],
|
contents=img_contents[0],
|
||||||
|
|
@ -145,7 +153,12 @@ def chapter_html(story, image_bool=False, image_format="JPEG", titleprefix=None,
|
||||||
|
|
||||||
def generate_epub(story, cover_options={}, image_options=None, output_filename=None, output_dir=None, normalize=False):
|
def generate_epub(story, cover_options={}, image_options=None, output_filename=None, output_dir=None, normalize=False):
|
||||||
if image_options is None:
|
if image_options is None:
|
||||||
image_options = {'image_bool': False, 'image_format': 'JPEG'}
|
image_options = {
|
||||||
|
'image_bool': False,
|
||||||
|
'image_format': 'JPEG',
|
||||||
|
'compress_images': False,
|
||||||
|
'max_image_size': 1_000_000
|
||||||
|
}
|
||||||
dates = list(story.dates())
|
dates = list(story.dates())
|
||||||
metadata = {
|
metadata = {
|
||||||
'title': story.title,
|
'title': story.title,
|
||||||
|
|
@ -192,6 +205,8 @@ def generate_epub(story, cover_options={}, image_options=None, output_filename=
|
||||||
story,
|
story,
|
||||||
image_bool=image_options.get('image_bool'),
|
image_bool=image_options.get('image_bool'),
|
||||||
image_format=image_options.get('image_format'),
|
image_format=image_options.get('image_format'),
|
||||||
|
compress_images=image_options.get('compress_images'),
|
||||||
|
max_image_size=image_options.get('max_image_size'),
|
||||||
normalize=normalize
|
normalize=normalize
|
||||||
),
|
),
|
||||||
EpubFile(
|
EpubFile(
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import PIL
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
|
import math
|
||||||
import textwrap
|
import textwrap
|
||||||
import requests
|
import requests
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -44,6 +45,44 @@ def make_image(
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def get_size_format(b, factor=1000, suffix="B"):
|
||||||
|
"""
|
||||||
|
Scale bytes to its proper byte format
|
||||||
|
e.g:
|
||||||
|
1253656 => '1.20MB'
|
||||||
|
1253656678 => '1.17GB'
|
||||||
|
"""
|
||||||
|
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
|
||||||
|
if b < factor:
|
||||||
|
return f"{b:.2f}{unit}{suffix}"
|
||||||
|
b /= factor
|
||||||
|
return f"{b:.2f}Y{suffix}"
|
||||||
|
|
||||||
|
|
||||||
|
def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image:
|
||||||
|
image_size = get_size_format(len(image.getvalue()))
|
||||||
|
logger.info(f"Image size: {image_size}")
|
||||||
|
|
||||||
|
big_photo = Image.open(image).convert("RGBA")
|
||||||
|
|
||||||
|
target_pixel_count = 2.8114 * target_size
|
||||||
|
if len(image.getvalue()) > target_size:
|
||||||
|
logger.info(f"Image is greater than {get_size_format(target_size)}, compressing")
|
||||||
|
scale_factor = target_pixel_count / math.prod(big_photo.size)
|
||||||
|
if scale_factor < 1:
|
||||||
|
x, y = tuple(int(scale_factor * dim) for dim in big_photo.size)
|
||||||
|
logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})")
|
||||||
|
sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS)
|
||||||
|
else:
|
||||||
|
sml_photo = big_photo
|
||||||
|
compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format)))
|
||||||
|
logger.info(f"Compressed image size: {compressed_image_size}")
|
||||||
|
return sml_photo
|
||||||
|
else:
|
||||||
|
logger.info(f"Image is less than {get_size_format(target_size)}, not compressing")
|
||||||
|
return big_photo
|
||||||
|
|
||||||
|
|
||||||
def PIL_Image_to_bytes(
|
def PIL_Image_to_bytes(
|
||||||
pil_image: PIL.Image.Image,
|
pil_image: PIL.Image.Image,
|
||||||
image_format: str
|
image_format: str
|
||||||
|
|
@ -74,13 +113,20 @@ def PIL_Image_to_bytes(
|
||||||
return out_io.getvalue()
|
return out_io.getvalue()
|
||||||
|
|
||||||
|
|
||||||
def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str, str]:
|
def get_image_from_url(
|
||||||
|
url: str,
|
||||||
|
image_format: str = "JPEG",
|
||||||
|
compress_images: bool = False,
|
||||||
|
max_image_size: int = 1_000_000
|
||||||
|
) -> Tuple[bytes, str, str]:
|
||||||
"""
|
"""
|
||||||
Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
|
Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
|
||||||
an image tag and returns the image data, the image format and the image mime type
|
an image tag and returns the image data, the image format and the image mime type
|
||||||
|
|
||||||
@param url: The url of the image
|
@param url: The url of the image
|
||||||
@param image_format: The format to convert the image to if it's not in the supported formats
|
@param image_format: The format to convert the image to if it's not in the supported formats
|
||||||
|
@param compress_images: Whether to compress the image or not
|
||||||
|
@param max_image_size: The maximum size of the image in bytes
|
||||||
@return: A tuple of the image data, the image format and the image mime type
|
@return: A tuple of the image data, the image format and the image mime type
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
@ -90,8 +136,15 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
|
||||||
elif url.startswith("data:image") and 'base64' in url:
|
elif url.startswith("data:image") and 'base64' in url:
|
||||||
logger.info("Base64 image detected")
|
logger.info("Base64 image detected")
|
||||||
head, base64data = url.split(',')
|
head, base64data = url.split(',')
|
||||||
file_ext = head.split(';')[0].split('/')[1]
|
file_ext = str(head.split(';')[0].split('/')[1])
|
||||||
imgdata = b64decode(base64data)
|
imgdata = b64decode(base64data)
|
||||||
|
if compress_images:
|
||||||
|
if file_ext.lower() == "gif":
|
||||||
|
logger.info("GIF images should not be compressed, skipping compression")
|
||||||
|
else:
|
||||||
|
compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext)
|
||||||
|
imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext)
|
||||||
|
|
||||||
if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
|
if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
|
||||||
logger.info(f"Image format {file_ext} not supported by EPUB2.0.1, converting to {image_format}")
|
logger.info(f"Image format {file_ext} not supported by EPUB2.0.1, converting to {image_format}")
|
||||||
return _convert_to_new_format(imgdata, image_format).read(), image_format.lower(), f"image/{image_format.lower()}"
|
return _convert_to_new_format(imgdata, image_format).read(), image_format.lower(), f"image/{image_format.lower()}"
|
||||||
|
|
@ -103,7 +156,7 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
|
||||||
image.seek(0)
|
image.seek(0)
|
||||||
|
|
||||||
PIL_image = Image.open(image)
|
PIL_image = Image.open(image)
|
||||||
img_format = PIL_image.format
|
img_format = str(PIL_image.format)
|
||||||
|
|
||||||
if img_format.lower() == "gif":
|
if img_format.lower() == "gif":
|
||||||
PIL_image = Image.open(image)
|
PIL_image = Image.open(image)
|
||||||
|
|
@ -111,6 +164,9 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
|
||||||
PIL_image.info['version'] = b"GIF89a"
|
PIL_image.info['version'] = b"GIF89a"
|
||||||
return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"
|
return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"
|
||||||
|
|
||||||
|
if compress_images:
|
||||||
|
PIL_image = compress_image(image, max_image_size, img_format)
|
||||||
|
|
||||||
return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}"
|
return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -119,7 +175,7 @@ def get_image_from_url(url: str, image_format: str = "JPEG") -> Tuple[bytes, str
|
||||||
return cover, "jpeg", "image/jpeg"
|
return cover, "jpeg", "image/jpeg"
|
||||||
|
|
||||||
|
|
||||||
def _convert_to_new_format(image_bytestream, image_format):
|
def _convert_to_new_format(image_bytestream, image_format: str):
|
||||||
new_image = BytesIO()
|
new_image = BytesIO()
|
||||||
try:
|
try:
|
||||||
Image.open(image_bytestream).save(new_image, format=image_format.upper())
|
Image.open(image_bytestream).save(new_image, format=image_format.upper())
|
||||||
|
|
|
||||||
17
leech.py
17
leech.py
|
|
@ -60,6 +60,8 @@ def load_on_disk_options(site):
|
||||||
login = store.get('logins', {}).get(site.site_key(), False)
|
login = store.get('logins', {}).get(site.site_key(), False)
|
||||||
image_bool: bool = store.get('images', False)
|
image_bool: bool = store.get('images', False)
|
||||||
image_format: str = store.get('image_format', 'jpeg')
|
image_format: str = store.get('image_format', 'jpeg')
|
||||||
|
compress_images: bool = store.get('compress_images', False)
|
||||||
|
max_image_size: int = store.get('max_image_size', 1_000_000)
|
||||||
configured_site_options = store.get('site_options', {}).get(site.site_key(), {})
|
configured_site_options = store.get('site_options', {}).get(site.site_key(), {})
|
||||||
cover_options = store.get('cover', {})
|
cover_options = store.get('cover', {})
|
||||||
output_dir = store.get('output_dir', False)
|
output_dir = store.get('output_dir', False)
|
||||||
|
|
@ -68,12 +70,14 @@ def load_on_disk_options(site):
|
||||||
login = False
|
login = False
|
||||||
image_bool = False
|
image_bool = False
|
||||||
image_format = 'jpeg'
|
image_format = 'jpeg'
|
||||||
|
compress_images = False
|
||||||
|
max_image_size = 1_000_000
|
||||||
configured_site_options = {}
|
configured_site_options = {}
|
||||||
cover_options = {}
|
cover_options = {}
|
||||||
output_dir = False
|
output_dir = False
|
||||||
if output_dir and 'output_dir' not in configured_site_options:
|
if output_dir and 'output_dir' not in configured_site_options:
|
||||||
configured_site_options['output_dir'] = output_dir
|
configured_site_options['output_dir'] = output_dir
|
||||||
return configured_site_options, login, cover_options, image_bool, image_format
|
return configured_site_options, login, cover_options, image_bool, image_format, compress_images, max_image_size
|
||||||
|
|
||||||
|
|
||||||
def create_options(site, site_options, unused_flags):
|
def create_options(site, site_options, unused_flags):
|
||||||
|
|
@ -84,7 +88,7 @@ def create_options(site, site_options, unused_flags):
|
||||||
|
|
||||||
flag_specified_site_options = site.interpret_site_specific_options(**unused_flags)
|
flag_specified_site_options = site.interpret_site_specific_options(**unused_flags)
|
||||||
|
|
||||||
configured_site_options, login, cover_options, image_bool, image_format = load_on_disk_options(site)
|
configured_site_options, login, cover_options, image_bool, image_format, compress_images, max_image_size = load_on_disk_options(site)
|
||||||
|
|
||||||
overridden_site_options = json.loads(site_options)
|
overridden_site_options = json.loads(site_options)
|
||||||
|
|
||||||
|
|
@ -96,7 +100,7 @@ def create_options(site, site_options, unused_flags):
|
||||||
list(overridden_site_options.items()) +
|
list(overridden_site_options.items()) +
|
||||||
list(flag_specified_site_options.items()) +
|
list(flag_specified_site_options.items()) +
|
||||||
list(cover_options.items()) +
|
list(cover_options.items()) +
|
||||||
list({'image_bool': image_bool, 'image_format': image_format}.items())
|
list({'image_bool': image_bool, 'image_format': image_format, 'compress_images': compress_images, 'max_image_size': max_image_size }.items())
|
||||||
)
|
)
|
||||||
return options, login
|
return options, login
|
||||||
|
|
||||||
|
|
@ -174,7 +178,12 @@ def download(urls, site_options, cache, verbose, normalize, output_dir, **other_
|
||||||
if story:
|
if story:
|
||||||
filename = ebook.generate_epub(
|
filename = ebook.generate_epub(
|
||||||
story, options,
|
story, options,
|
||||||
image_options={'image_bool': options['image_bool'], 'image_format': options['image_format'] or 'jpeg'},
|
image_options={
|
||||||
|
'image_bool': options['image_bool'] or False,
|
||||||
|
'image_format': options['image_format'] or 'jpeg',
|
||||||
|
'compress_images': options['compress_images'] or False,
|
||||||
|
'max_image_size': options['max_image_size'] or 1_000_000
|
||||||
|
},
|
||||||
normalize=normalize,
|
normalize=normalize,
|
||||||
output_dir=output_dir or options.get('output_dir', os.getcwd())
|
output_dir=output_dir or options.get('output_dir', os.getcwd())
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue