mirror of
https://github.com/kemayo/leech
synced 2025-12-06 16:33:16 +01:00
Made arbitrary sites no longer leak memory and fixed worm epub.
Each `Chapter` object had a reference to the entire page tree, meaning that the program rose in RAM usage by a lot. Transformed Worm to be with next_selector so the chapters are correctly ordered, E.2 is not skipped and the download does not crush due to `?share=twitter` url matched before. Fixed Worm titles.
This commit is contained in:
parent
377e6ff76c
commit
1afac50437
2 changed files with 8 additions and 3 deletions
|
|
@ -1,9 +1,11 @@
|
||||||
{
|
{
|
||||||
"url": "https://parahumans.wordpress.com/table-of-contents/",
|
"url": "https://parahumans.wordpress.com/2011/06/11/1-1/",
|
||||||
"title": "Worm",
|
"title": "Worm",
|
||||||
"author": "Wildbow",
|
"author": "Wildbow",
|
||||||
"chapter_selector": "#main .entry-content a",
|
"content_selector": "#main",
|
||||||
"content_selector": "#main .entry-content",
|
"content_title_selector": "h1.entry-title",
|
||||||
|
"content_text_selector": ".entry-content",
|
||||||
"filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']",
|
"filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']",
|
||||||
|
"next_selector": "a[rel=\"next\"]",
|
||||||
"cover_url": "https://pre00.deviantart.net/969a/th/pre/i/2015/051/8/7/worm_cover_by_cactusfantastico-d8ivj4b.png"
|
"cover_url": "https://pre00.deviantart.net/969a/th/pre/i/2015/051/8/7/worm_cover_by_cactusfantastico-d8ivj4b.png"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -126,6 +126,9 @@ class Arbitrary(Site):
|
||||||
|
|
||||||
# TODO: consider `'\n'.join(map(str, content.contents))`
|
# TODO: consider `'\n'.join(map(str, content.contents))`
|
||||||
content.name = 'div'
|
content.name = 'div'
|
||||||
|
|
||||||
|
# Extract from bs4 tree so the rest of the tree gets deleted.
|
||||||
|
content = content.extract()
|
||||||
|
|
||||||
chapters.append(Chapter(
|
chapters.append(Chapter(
|
||||||
title=title,
|
title=title,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue