Remove arbitrary's special-case image loading, since the default works

2025-12-06 16:33:16 +01:00 · 2024-11-23 15:40:47 -06:00 · 2024-11-23 15:40:47 -06:00 · 9510a22cb0
commit 9510a22cb0
parent 21834bb5ed
3 changed files with 4 additions and 38 deletions
--- a/examples/pale-withextras.json
+++ b/examples/pale-withextras.json
@ -6,6 +6,5 @@
    "content_title_selector": "h1.entry-title",
    "content_text_selector": ".entry-content",
    "filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
-    "next_selector": "a[rel=\"next\"]",
-    "image_selector": ".entry-content img"
+    "next_selector": "a[rel=\"next\"]"
 }
--- a/examples/pale.json
+++ b/examples/pale.json
@ -6,6 +6,5 @@
    "content_title_selector": "h1.entry-title",
    "content_text_selector": ".entry-content",
    "chapter_selector": "article .entry-content > p a",
-    "filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
-    "image_selector": ".entry-content img"
+    "filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']"
 }
--- a/sites/arbitrary.py
+++ b/sites/arbitrary.py
@ -6,8 +6,7 @@ import datetime
 import json
 import re
 import os.path
-import urllib
-from . import register, Site, Section, Chapter, Image
+from . import register, Site, Section, Chapter

 logger = logging.getLogger(__name__)

@ -134,42 +133,11 @@ class Arbitrary(Site):

            self._clean(content, base)

-            images = []
-            if definition.image_selector:
-                images = self.load_images(content, definition.image_selector)
-
            chapters.append(Chapter(
                title=title,
                contents=content.prettify(),
                # TODO: better date detection
-                date=datetime.datetime.now(),
-                images=images
+                date=datetime.datetime.now()
            ))

        return chapters
-
-    def load_images(self, content, selector):
-        images = []
-        for image in content.select(selector):
-            if not image.has_attr('src'):
-                continue
-
-            image_url = image['src']
-            url = urllib.parse.urlparse(image_url)
-            local_path = 'chapter_images/' + url.path.strip('/')
-
-            image_res = self.session.get(image_url)
-            content_type = image_res.headers['Content-Type']
-            image_data = image_res.content
-
-            images.append(Image(
-                path=local_path,
-                contents=image_data,
-                content_type=content_type
-            ))
-            # Replace 'src'.
-            image['src'] = '../' + local_path
-            if image.has_attr('srcset'):
-                del image['srcset']
-
-        return images