mirror of
https://github.com/kemayo/leech
synced 2025-12-08 01:14:10 +01:00
Remove arbitrary's special-case image loading, since the default works
This commit is contained in:
parent
21834bb5ed
commit
9510a22cb0
3 changed files with 4 additions and 38 deletions
|
|
@ -6,6 +6,5 @@
|
||||||
"content_title_selector": "h1.entry-title",
|
"content_title_selector": "h1.entry-title",
|
||||||
"content_text_selector": ".entry-content",
|
"content_text_selector": ".entry-content",
|
||||||
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
|
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
|
||||||
"next_selector": "a[rel=\"next\"]",
|
"next_selector": "a[rel=\"next\"]"
|
||||||
"image_selector": ".entry-content img"
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,5 @@
|
||||||
"content_title_selector": "h1.entry-title",
|
"content_title_selector": "h1.entry-title",
|
||||||
"content_text_selector": ".entry-content",
|
"content_text_selector": ".entry-content",
|
||||||
"chapter_selector": "article .entry-content > p a",
|
"chapter_selector": "article .entry-content > p a",
|
||||||
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
|
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']"
|
||||||
"image_selector": ".entry-content img"
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,7 @@ import datetime
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import os.path
|
import os.path
|
||||||
import urllib
|
from . import register, Site, Section, Chapter
|
||||||
from . import register, Site, Section, Chapter, Image
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -134,42 +133,11 @@ class Arbitrary(Site):
|
||||||
|
|
||||||
self._clean(content, base)
|
self._clean(content, base)
|
||||||
|
|
||||||
images = []
|
|
||||||
if definition.image_selector:
|
|
||||||
images = self.load_images(content, definition.image_selector)
|
|
||||||
|
|
||||||
chapters.append(Chapter(
|
chapters.append(Chapter(
|
||||||
title=title,
|
title=title,
|
||||||
contents=content.prettify(),
|
contents=content.prettify(),
|
||||||
# TODO: better date detection
|
# TODO: better date detection
|
||||||
date=datetime.datetime.now(),
|
date=datetime.datetime.now()
|
||||||
images=images
|
|
||||||
))
|
))
|
||||||
|
|
||||||
return chapters
|
return chapters
|
||||||
|
|
||||||
def load_images(self, content, selector):
|
|
||||||
images = []
|
|
||||||
for image in content.select(selector):
|
|
||||||
if not image.has_attr('src'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
image_url = image['src']
|
|
||||||
url = urllib.parse.urlparse(image_url)
|
|
||||||
local_path = 'chapter_images/' + url.path.strip('/')
|
|
||||||
|
|
||||||
image_res = self.session.get(image_url)
|
|
||||||
content_type = image_res.headers['Content-Type']
|
|
||||||
image_data = image_res.content
|
|
||||||
|
|
||||||
images.append(Image(
|
|
||||||
path=local_path,
|
|
||||||
contents=image_data,
|
|
||||||
content_type=content_type
|
|
||||||
))
|
|
||||||
# Replace 'src'.
|
|
||||||
image['src'] = '../' + local_path
|
|
||||||
if image.has_attr('srcset'):
|
|
||||||
del image['srcset']
|
|
||||||
|
|
||||||
return images
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue