mirror of
https://github.com/kemayo/leech
synced 2025-12-31 04:33:58 +01:00
Merge pull request #13 from AlexRaubach/master
Add The Gods Are Bastards, Worm and an array bounds check
This commit is contained in:
commit
123eb69821
3 changed files with 20 additions and 0 deletions
8
examples/thegodsarebastards.json
Normal file
8
examples/thegodsarebastards.json
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"url": "https://tiraas.wordpress.com/table-of-contents/",
|
||||
"title": "The Gods Are Bastards",
|
||||
"author": "D. D. Webb",
|
||||
"chapter_selector": "article .entry-content a[href*='20']",
|
||||
"content_selector": "article .entry-content",
|
||||
"filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']"
|
||||
}
|
||||
8
examples/worm.json
Normal file
8
examples/worm.json
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"url": "https://parahumans.wordpress.com/table-of-contents/",
|
||||
"title": "Worm",
|
||||
"author": "Wildbow",
|
||||
"chapter_selector": "#main .entry-content a",
|
||||
"content_selector": "#main .entry-content",
|
||||
"filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']"
|
||||
}
|
||||
|
|
@ -80,6 +80,10 @@ class Arbitrary(Site):
|
|||
# Probably by changing it so that this returns a Chapter / Section.
|
||||
logger.info("Extracting chapter @ %s", url)
|
||||
soup = self._soup(url)
|
||||
|
||||
if not soup.select(definition.content_selector):
|
||||
return ''
|
||||
|
||||
content = soup.select(definition.content_selector)[0]
|
||||
|
||||
if definition.filter_selector:
|
||||
|
|
|
|||
Loading…
Reference in a new issue