Assorted tweaks to make type checking a bit happier

2026-05-07 03:52:17 +02:00 · 2026-05-05 19:59:50 -05:00 · 2026-05-05 19:59:50 -05:00 · e93590e13c
commit e93590e13c
parent 82cf246593
4 changed files with 27 additions and 28 deletions
--- a/leech.py
+++ b/leech.py
@ -45,9 +45,9 @@ def configure_logging(verbose):
        )


-def create_session(cache):
+def create_session(cache) -> requests_cache.CachedSession | requests.Session:
    if cache:
-        session = requests_cache.CachedSession('leech', expire_after=4 * 3600, use_temp=True)
+        session = requests_cache.CachedSession('leech', expire_after=4 * 3600, use_temp=True, backend='sqlite')
        logger.debug("CachedSession at %s", session.cache.db_path)
    else:
        session = requests.Session()
@ -59,7 +59,7 @@ def create_session(cache):
            logger.debug("No leech.cookies present in %s", directory)
            continue
        try:
-            lwp_cookiejar.load(directory / 'leech.cookies', ignore_discard=True)
+            lwp_cookiejar.load(str(directory / 'leech.cookies'), ignore_discard=True)
        except Exception:
            # This file is very much optional, so this log isn't really necessary
            logger.exception("Couldn't load cookies from leech.cookies in %s", dirs.user_data_path)
@ -200,7 +200,7 @@ def download(urls, site_options, cache, verbose, normalize, output_dir, user_age
        options, login = create_options(site, site_options, other_flags)
        if UA := user_agent or options.get('user_agent'):
            logger.debug('USER_AGENT overridden to "%s"', UA)
-            session.headers.update( {'USER_AGENT': UA})
+            session.headers.update({'USER_AGENT': UA})
        site_output_dir = Path(output_dir or options.get('output_dir', os.getcwd())).expanduser().resolve()
        if not os.path.exists(site_output_dir):
            logger.warning("output directory doesn't exist: %s", site_output_dir)
--- a/sites/init.py
+++ b/sites/init.py
@ -7,9 +7,10 @@ import uuid
 import datetime
 import time
 import logging
-import urllib
 import re
 import hashlib
+import requests
+from urllib import parse as urlparse
 from attrs import define, field, Factory
 from bs4 import BeautifulSoup

@ -34,7 +35,7 @@ class Image:
        if self.url.startswith("data:image") and 'base64' in self.url:
            head, base64data = self.url.split(',')
            return str(head.split(';')[0].split('/')[1])
-        path = urllib.parse.urlparse(self.url).path
+        path = urlparse.urlparse(self.url).path
        return os.path.splitext(path)[1]


@ -42,7 +43,7 @@ class Image:
 class Chapter:
    title: str
    contents: str
-    date: datetime.datetime = False
+    date: datetime.datetime | None = None
    images: dict = Factory(dict)


@ -93,7 +94,7 @@ class Site:
    """A Site handles checking whether a URL might represent a site, and then
    extracting the content of a story from said site.
    """
-    session: object = field()
+    session: requests.Session = field()
    footnotes: list = field(factory=list, init=False)
    options: dict = Factory(
        lambda site: site.get_default_options(),
@ -102,9 +103,7 @@ class Site:

    @classmethod
    def site_key(cls):
-        if hasattr(cls, '_key'):
-            return cls._key
-        return cls.__name__
+        return getattr(cls, '_key', cls.__name__)

    @staticmethod
    def get_site_specific_option_defs():
@ -176,8 +175,8 @@ class Site:
                options[option.name] = option_value
        return options

-    @staticmethod
-    def matches(url):
+    @classmethod
+    def matches(cls, url):
        raise NotImplementedError()

    def extract(self, url):
@ -196,7 +195,7 @@ class Site:
    def login(self, login_details):
        raise NotImplementedError()

-    def _soup(self, url, method=False, delay=0, retry=3, retry_delay=10, **kw):
+    def _soup(self, url, method=None, delay=0, retry=3, retry_delay=10, **kw) -> tuple[BeautifulSoup, str]:
        if not method:
            method = self.options.get('parser', 'lxml')
        if url.startswith('http://') or url.startswith('https://'):
@ -221,7 +220,7 @@ class Site:
            text = url
            fallback_base = ''
        soup = BeautifulSoup(text, method)
-        return soup, (soup.head and soup.head.base) and soup.head.base.get('href') or fallback_base
+        return soup, str((soup.head and soup.head.base) and soup.head.base.get('href') or fallback_base)

    def _form_in_soup(self, soup):
        if soup.name == 'form':
@ -265,7 +264,7 @@ class Site:
        return soup.new_tag(*args, **kw)

    def _join_url(self, *args, **kwargs):
-        return urllib.parse.urljoin(*args, **kwargs)
+        return urlparse.urljoin(*args, **kwargs)

    def _footnote(self, contents, chapterid):
        """Register a footnote and return a link to that footnote"""
@ -302,7 +301,7 @@ class Site:

        return spoiler_link

-    def _clean(self, contents, base=False):
+    def _clean(self, contents, base:str|None=None):
        """Clean up story content to be more ebook-friendly

        TODO: this expects a soup as its argument, so the couple of API-driven sites can't use it as-is
@ -339,7 +338,7 @@ class Site:
        # Call this on a story after it's fully extracted to clean up things
        for chapter in story:
            if hasattr(chapter, '__iter__'):
-                self._finalize(chapter, story)
+                self._finalize(chapter)
            else:
                self._process_images(chapter)

@ -380,9 +379,9 @@ class SiteSpecificOption:
    name: str
    flag_pattern: str
    type: object = None
-    default: bool = False
-    help: str = None
-    choices: tuple = None
+    default: object = False
+    help: str|None = None
+    choices: tuple|None = None
    exposed: bool = True
    click_kwargs: frozenset = field(converter=lambda kwargs: frozenset(kwargs.items()), default={})

--- a/sites/arbitrary.py
+++ b/sites/arbitrary.py
@ -31,15 +31,15 @@ class SiteDefinition:
    author: str
    content_selector: str
    # If present, find something within `content` to use a chapter title; if not found, the link text to it will be used
-    content_title_selector: str = False
+    content_title_selector: str|None = None
    # If present, find a specific element in the `content` to be the chapter text
-    content_text_selector: str = False
+    content_text_selector: str|None = None
    # If present, it looks for chapters linked from `url`. If not, it assumes `url` points to a chapter.
-    chapter_selector: str = False
+    chapter_selector: str|None = None
    # If present, use to find a link to the next content page (only used if not using chapter_selector)
-    next_selector: str = False
+    next_selector: str|None = None
    # If present, use to filter out content that matches the selector
-    filter_selector: str = False
+    filter_selector: str|None = None
    cover_url: str = ''


@ -110,7 +110,7 @@ class Arbitrary(Site):

        return story

-    def _chapter(self, url, definition, title=False):
+    def _chapter(self, url, definition, title=None):
        logger.info("Extracting chapter @ %s", url)
        soup, base = self._soup(url)

--- a/sites/xenforo2.py
+++ b/sites/xenforo2.py
@ -24,7 +24,7 @@ class XenForo2(XenForo):
            tags=tags
        )

-    def _posts_from_page(self, soup, postid=False):
+    def _posts_from_page(self, soup, postid=None):
        if postid:
            return soup.find('article', id='js-post-' + postid)
        return soup.select('article.message--post')