mirror of
https://github.com/kemayo/leech
synced 2025-12-06 16:33:16 +01:00
Remove arbitrary's special-case image loading, since the default works
This commit is contained in:
parent
21834bb5ed
commit
9510a22cb0
3 changed files with 4 additions and 38 deletions
|
|
@ -6,6 +6,5 @@
|
|||
"content_title_selector": "h1.entry-title",
|
||||
"content_text_selector": ".entry-content",
|
||||
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
|
||||
"next_selector": "a[rel=\"next\"]",
|
||||
"image_selector": ".entry-content img"
|
||||
"next_selector": "a[rel=\"next\"]"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,5 @@
|
|||
"content_title_selector": "h1.entry-title",
|
||||
"content_text_selector": ".entry-content",
|
||||
"chapter_selector": "article .entry-content > p a",
|
||||
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
|
||||
"image_selector": ".entry-content img"
|
||||
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,8 +6,7 @@ import datetime
|
|||
import json
|
||||
import re
|
||||
import os.path
|
||||
import urllib
|
||||
from . import register, Site, Section, Chapter, Image
|
||||
from . import register, Site, Section, Chapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -134,42 +133,11 @@ class Arbitrary(Site):
|
|||
|
||||
self._clean(content, base)
|
||||
|
||||
images = []
|
||||
if definition.image_selector:
|
||||
images = self.load_images(content, definition.image_selector)
|
||||
|
||||
chapters.append(Chapter(
|
||||
title=title,
|
||||
contents=content.prettify(),
|
||||
# TODO: better date detection
|
||||
date=datetime.datetime.now(),
|
||||
images=images
|
||||
date=datetime.datetime.now()
|
||||
))
|
||||
|
||||
return chapters
|
||||
|
||||
def load_images(self, content, selector):
|
||||
images = []
|
||||
for image in content.select(selector):
|
||||
if not image.has_attr('src'):
|
||||
continue
|
||||
|
||||
image_url = image['src']
|
||||
url = urllib.parse.urlparse(image_url)
|
||||
local_path = 'chapter_images/' + url.path.strip('/')
|
||||
|
||||
image_res = self.session.get(image_url)
|
||||
content_type = image_res.headers['Content-Type']
|
||||
image_data = image_res.content
|
||||
|
||||
images.append(Image(
|
||||
path=local_path,
|
||||
contents=image_data,
|
||||
content_type=content_type
|
||||
))
|
||||
# Replace 'src'.
|
||||
image['src'] = '../' + local_path
|
||||
if image.has_attr('srcset'):
|
||||
del image['srcset']
|
||||
|
||||
return images
|
||||
|
|
|
|||
Loading…
Reference in a new issue