Pass through some more headers in the session

2026-03-19 10:51:35 +01:00 · 2024-12-17 14:41:33 -06:00 · 2024-12-17 14:41:33 -06:00 · 3fdbae5851
commit 3fdbae5851
parent 204807add6
3 changed files with 28 additions and 11 deletions
--- a/ebook/init.py
+++ b/ebook/init.py
@ -7,7 +7,6 @@ from bs4 import BeautifulSoup
 import html
 import unicodedata
 import datetime
-import requests
 from attrs import define, asdict

 html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
@ -91,7 +90,8 @@ def chapter_html(
    story,
    image_options,
    titleprefix=None,
-    normalize=False
+    normalize=False,
+    session=None
 ):
    already_fetched_images = {}
    chapters = []
@ -100,7 +100,7 @@ def chapter_html(
        if hasattr(chapter, '__iter__'):
            # This is a Section
            chapters.extend(chapter_html(
-                chapter, image_options=image_options, titleprefix=title, normalize=normalize
+                chapter, image_options=image_options, titleprefix=title, normalize=normalize, session=session
            ))
        else:
            soup = BeautifulSoup(chapter.contents, 'html5lib')
@ -118,7 +118,8 @@ def chapter_html(
                            image_format=image_options.get('image_format'),
                            compress_images=image_options.get('compress_images'),
                            max_image_size=image_options.get('max_image_size'),
-                            always_convert=image_options.get('always_convert_images')
+                            always_convert=image_options.get('always_convert_images'),
+                            session=session
                        )
                        chapter.images.append(Image(
                            path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}",
@ -169,7 +170,7 @@ def chapter_html(
    return chapters


-def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False):
+def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False, session=None):
    dates = list(story.dates())
    metadata = {
        'title': story.title,
@ -181,6 +182,14 @@ def generate_epub(story, cover_options={}, image_options={}, output_filename=Non
    }
    extra_metadata = {}

+    session.headers.update({
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0',
+    })
+    if story.url:
+        session.headers.update({
+            'Referer': story.url,
+        })
+
    if story.summary:
        extra_metadata['Summary'] = story.summary
    if story.tags:
@ -220,11 +229,12 @@ def generate_epub(story, cover_options={}, image_options={}, output_filename=Non
            *chapter_html(
                story,
                image_options=image_options,
-                normalize=normalize
+                normalize=normalize,
+                session=session
            ),
            EpubFile(
                path='Styles/base.css',
-                contents=requests.Session().get(
+                contents=session.get(
                    'https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text,
                filetype='text/css'
            ),
--- a/ebook/image.py
+++ b/ebook/image.py
@ -86,7 +86,8 @@ def get_image_from_url(
    image_format: str = "JPEG",
    compress_images: bool = False,
    max_image_size: int = 1_000_000,
-    always_convert: bool = False
+    always_convert: bool = False,
+    session: requests.Session = None
 ) -> Tuple[bytes, str, str]:
    """
    Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
@ -98,6 +99,8 @@ def get_image_from_url(
    @param max_image_size: The maximum size of the image in bytes
    @return: A tuple of the image data, the image format and the image mime type
    """
+
+    session = session or requests.Session()
    try:
        if url.startswith("https://www.filepicker.io/api/"):
            logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.")
@ -123,7 +126,7 @@ def get_image_from_url(
            return imgdata, file_ext, f"image/{file_ext}"

        print(url)
-        img = requests.Session().get(url)
+        img = session.get(url)
        image = BytesIO(img.content)
        image.seek(0)

--- a/leech.py
+++ b/leech.py
@ -48,7 +48,10 @@ def create_session(cache):
        pass
    session.cookies.update(lwp_cookiejar)
    session.headers.update({
-        'User-agent': USER_AGENT
+        'User-Agent': USER_AGENT,
+        'Accept-Language': 'en-US,en;q=0.5',
+        'Accept-Encoding': 'gzip, deflate',
+        'Accept': '*/*',  # this is essential for imgur
    })
    return session

@ -181,7 +184,8 @@ def download(urls, site_options, cache, verbose, normalize, output_dir, **other_
                },
                normalize=normalize,
                output_dir=output_dir or options.get('output_dir', os.getcwd()),
-                allow_spaces=options.get('allow_spaces', False)
+                allow_spaces=options.get('allow_spaces', False),
+                session=session
            )
            logger.info("File created: " + filename)
        else: