1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2025-12-31 04:33:58 +01:00

Merge pull request #13 from AlexRaubach/master

Add The Gods Are Bastards, Worm and an array bounds check
This commit is contained in:
David Lynch 2018-08-29 08:38:18 -05:00 committed by GitHub
commit 123eb69821
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 0 deletions

View file

@ -0,0 +1,8 @@
{
"url": "https://tiraas.wordpress.com/table-of-contents/",
"title": "The Gods Are Bastards",
"author": "D. D. Webb",
"chapter_selector": "article .entry-content a[href*='20']",
"content_selector": "article .entry-content",
"filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']"
}

8
examples/worm.json Normal file
View file

@ -0,0 +1,8 @@
{
"url": "https://parahumans.wordpress.com/table-of-contents/",
"title": "Worm",
"author": "Wildbow",
"chapter_selector": "#main .entry-content a",
"content_selector": "#main .entry-content",
"filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']"
}

View file

@ -80,6 +80,10 @@ class Arbitrary(Site):
# Probably by changing it so that this returns a Chapter / Section.
logger.info("Extracting chapter @ %s", url)
soup = self._soup(url)
if not soup.select(definition.content_selector):
return ''
content = soup.select(definition.content_selector)[0]
if definition.filter_selector: