mirror of
https://github.com/kemayo/leech
synced 2025-12-28 19:23:13 +01:00
Pass through some more headers in the session
This commit is contained in:
parent
204807add6
commit
3fdbae5851
3 changed files with 28 additions and 11 deletions
|
|
@ -7,7 +7,6 @@ from bs4 import BeautifulSoup
|
|||
import html
|
||||
import unicodedata
|
||||
import datetime
|
||||
import requests
|
||||
from attrs import define, asdict
|
||||
|
||||
html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
|
|
@ -91,7 +90,8 @@ def chapter_html(
|
|||
story,
|
||||
image_options,
|
||||
titleprefix=None,
|
||||
normalize=False
|
||||
normalize=False,
|
||||
session=None
|
||||
):
|
||||
already_fetched_images = {}
|
||||
chapters = []
|
||||
|
|
@ -100,7 +100,7 @@ def chapter_html(
|
|||
if hasattr(chapter, '__iter__'):
|
||||
# This is a Section
|
||||
chapters.extend(chapter_html(
|
||||
chapter, image_options=image_options, titleprefix=title, normalize=normalize
|
||||
chapter, image_options=image_options, titleprefix=title, normalize=normalize, session=session
|
||||
))
|
||||
else:
|
||||
soup = BeautifulSoup(chapter.contents, 'html5lib')
|
||||
|
|
@ -118,7 +118,8 @@ def chapter_html(
|
|||
image_format=image_options.get('image_format'),
|
||||
compress_images=image_options.get('compress_images'),
|
||||
max_image_size=image_options.get('max_image_size'),
|
||||
always_convert=image_options.get('always_convert_images')
|
||||
always_convert=image_options.get('always_convert_images'),
|
||||
session=session
|
||||
)
|
||||
chapter.images.append(Image(
|
||||
path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}",
|
||||
|
|
@ -169,7 +170,7 @@ def chapter_html(
|
|||
return chapters
|
||||
|
||||
|
||||
def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False):
|
||||
def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False, session=None):
|
||||
dates = list(story.dates())
|
||||
metadata = {
|
||||
'title': story.title,
|
||||
|
|
@ -181,6 +182,14 @@ def generate_epub(story, cover_options={}, image_options={}, output_filename=Non
|
|||
}
|
||||
extra_metadata = {}
|
||||
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0',
|
||||
})
|
||||
if story.url:
|
||||
session.headers.update({
|
||||
'Referer': story.url,
|
||||
})
|
||||
|
||||
if story.summary:
|
||||
extra_metadata['Summary'] = story.summary
|
||||
if story.tags:
|
||||
|
|
@ -220,11 +229,12 @@ def generate_epub(story, cover_options={}, image_options={}, output_filename=Non
|
|||
*chapter_html(
|
||||
story,
|
||||
image_options=image_options,
|
||||
normalize=normalize
|
||||
normalize=normalize,
|
||||
session=session
|
||||
),
|
||||
EpubFile(
|
||||
path='Styles/base.css',
|
||||
contents=requests.Session().get(
|
||||
contents=session.get(
|
||||
'https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text,
|
||||
filetype='text/css'
|
||||
),
|
||||
|
|
|
|||
|
|
@ -86,7 +86,8 @@ def get_image_from_url(
|
|||
image_format: str = "JPEG",
|
||||
compress_images: bool = False,
|
||||
max_image_size: int = 1_000_000,
|
||||
always_convert: bool = False
|
||||
always_convert: bool = False,
|
||||
session: requests.Session = None
|
||||
) -> Tuple[bytes, str, str]:
|
||||
"""
|
||||
Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
|
||||
|
|
@ -98,6 +99,8 @@ def get_image_from_url(
|
|||
@param max_image_size: The maximum size of the image in bytes
|
||||
@return: A tuple of the image data, the image format and the image mime type
|
||||
"""
|
||||
|
||||
session = session or requests.Session()
|
||||
try:
|
||||
if url.startswith("https://www.filepicker.io/api/"):
|
||||
logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.")
|
||||
|
|
@ -123,7 +126,7 @@ def get_image_from_url(
|
|||
return imgdata, file_ext, f"image/{file_ext}"
|
||||
|
||||
print(url)
|
||||
img = requests.Session().get(url)
|
||||
img = session.get(url)
|
||||
image = BytesIO(img.content)
|
||||
image.seek(0)
|
||||
|
||||
|
|
|
|||
8
leech.py
8
leech.py
|
|
@ -48,7 +48,10 @@ def create_session(cache):
|
|||
pass
|
||||
session.cookies.update(lwp_cookiejar)
|
||||
session.headers.update({
|
||||
'User-agent': USER_AGENT
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Accept': '*/*', # this is essential for imgur
|
||||
})
|
||||
return session
|
||||
|
||||
|
|
@ -181,7 +184,8 @@ def download(urls, site_options, cache, verbose, normalize, output_dir, **other_
|
|||
},
|
||||
normalize=normalize,
|
||||
output_dir=output_dir or options.get('output_dir', os.getcwd()),
|
||||
allow_spaces=options.get('allow_spaces', False)
|
||||
allow_spaces=options.get('allow_spaces', False),
|
||||
session=session
|
||||
)
|
||||
logger.info("File created: " + filename)
|
||||
else:
|
||||
|
|
|
|||
Loading…
Reference in a new issue