From 3fdbae58516f1fa92d0dfe7028eba3631c5a128b Mon Sep 17 00:00:00 2001
From: David Lynch <kemayo@gmail.com>
Date: Tue, 17 Dec 2024 14:41:33 -0600
Subject: [PATCH] Pass through some more headers in the session

---
 ebook/__init__.py | 24 +++++++++++++++++-------
 ebook/image.py    |  7 +++++--
 leech.py          |  8 ++++++--
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/ebook/__init__.py b/ebook/__init__.py
index 077b5b6..f667da1 100644
--- a/ebook/__init__.py
+++ b/ebook/__init__.py
@@ -7,7 +7,6 @@ from bs4 import BeautifulSoup
 import html
 import unicodedata
 import datetime
-import requests
 from attrs import define, asdict
 
 html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
@@ -91,7 +90,8 @@ def chapter_html(
     story,
     image_options,
     titleprefix=None,
-    normalize=False
+    normalize=False,
+    session=None
 ):
     already_fetched_images = {}
     chapters = []
@@ -100,7 +100,7 @@ def chapter_html(
         if hasattr(chapter, '__iter__'):
             # This is a Section
             chapters.extend(chapter_html(
-                chapter, image_options=image_options, titleprefix=title, normalize=normalize
+                chapter, image_options=image_options, titleprefix=title, normalize=normalize, session=session
             ))
         else:
             soup = BeautifulSoup(chapter.contents, 'html5lib')
@@ -118,7 +118,8 @@ def chapter_html(
                             image_format=image_options.get('image_format'),
                             compress_images=image_options.get('compress_images'),
                             max_image_size=image_options.get('max_image_size'),
-                            always_convert=image_options.get('always_convert_images')
+                            always_convert=image_options.get('always_convert_images'),
+                            session=session
                         )
                         chapter.images.append(Image(
                             path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}",
@@ -169,7 +170,7 @@ def chapter_html(
     return chapters
 
 
-def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False):
+def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False, session=None):
     dates = list(story.dates())
     metadata = {
         'title': story.title,
@@ -181,6 +182,14 @@ def generate_epub(story, cover_options={}, image_options={}, output_filename=Non
     }
     extra_metadata = {}
 
+    session.headers.update({
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0',
+    })
+    if story.url:
+        session.headers.update({
+            'Referer': story.url,
+        })
+
     if story.summary:
         extra_metadata['Summary'] = story.summary
     if story.tags:
@@ -220,11 +229,12 @@ def generate_epub(story, cover_options={}, image_options={}, output_filename=Non
             *chapter_html(
                 story,
                 image_options=image_options,
-                normalize=normalize
+                normalize=normalize,
+                session=session
             ),
             EpubFile(
                 path='Styles/base.css',
-                contents=requests.Session().get(
+                contents=session.get(
                     'https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text,
                 filetype='text/css'
             ),
diff --git a/ebook/image.py b/ebook/image.py
index 1cd024a..ee1e1eb 100644
--- a/ebook/image.py
+++ b/ebook/image.py
@@ -86,7 +86,8 @@ def get_image_from_url(
     image_format: str = "JPEG",
     compress_images: bool = False,
     max_image_size: int = 1_000_000,
-    always_convert: bool = False
+    always_convert: bool = False,
+    session: requests.Session = None
 ) -> Tuple[bytes, str, str]:
     """
     Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
@@ -98,6 +99,8 @@ def get_image_from_url(
     @param max_image_size: The maximum size of the image in bytes
     @return: A tuple of the image data, the image format and the image mime type
     """
+
+    session = session or requests.Session()
     try:
         if url.startswith("https://www.filepicker.io/api/"):
             logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.")
@@ -123,7 +126,7 @@ def get_image_from_url(
             return imgdata, file_ext, f"image/{file_ext}"
 
         print(url)
-        img = requests.Session().get(url)
+        img = session.get(url)
         image = BytesIO(img.content)
         image.seek(0)
 
diff --git a/leech.py b/leech.py
index 3b11abf..35c7e1d 100755
--- a/leech.py
+++ b/leech.py
@@ -48,7 +48,10 @@ def create_session(cache):
         pass
     session.cookies.update(lwp_cookiejar)
     session.headers.update({
-        'User-agent': USER_AGENT
+        'User-Agent': USER_AGENT,
+        'Accept-Language': 'en-US,en;q=0.5',
+        'Accept-Encoding': 'gzip, deflate',
+        'Accept': '*/*',  # this is essential for imgur
     })
     return session
 
@@ -181,7 +184,8 @@ def download(urls, site_options, cache, verbose, normalize, output_dir, **other_
                 },
                 normalize=normalize,
                 output_dir=output_dir or options.get('output_dir', os.getcwd()),
-                allow_spaces=options.get('allow_spaces', False)
+                allow_spaces=options.get('allow_spaces', False),
+                session=session
             )
             logger.info("File created: " + filename)
         else: