mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-07 12:02:25 +02:00
Refactor use_pagecache into an INI setting and a sharable, thread safe cache impl.
This commit is contained in:
parent
ddf82749af
commit
8ba5d2c423
52 changed files with 164 additions and 331 deletions
|
|
@ -145,7 +145,7 @@ include_tocpage: true
|
|||
|
||||
## entries to make epub subjects and calibre tags
|
||||
## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d"
|
||||
include_subject_tags: extratags, genre, category, characters, ships, status
|
||||
include_subject_tags: extratags, genre, category, characters, ships, status
|
||||
|
||||
## extra tags (comma separated) to include, primarily for epub.
|
||||
extratags: FanFiction
|
||||
|
|
@ -547,6 +547,12 @@ normalize_text_links:true
|
|||
## normalize_text_links will improve URL matching considerably.
|
||||
internalize_text_links:true
|
||||
|
||||
## Of the ~140 supported sites, only ~50 have been checked to work
|
||||
## correctly with a page cache. The page cache is used save already
|
||||
## downloaded pages which can be called more than once, especially in
|
||||
## the Calibre plugin.
|
||||
use_pagecache:false
|
||||
|
||||
[base_efiction]
|
||||
|
||||
## At the time of writing, eFiction Base adapters allow downloading
|
||||
|
|
@ -561,6 +567,7 @@ storynotes_label:Story Notes
|
|||
add_to_extra_titlepage_entries:,storynotes
|
||||
|
||||
[base_xenforoforum]
|
||||
use_pagecache:true
|
||||
## Some sites require login for some stories
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
|
@ -1119,7 +1126,7 @@ windows_eol: true
|
|||
## URLs like: http://test1.com?sid=12345
|
||||
|
||||
[test1.com]
|
||||
|
||||
use_pagecache:true
|
||||
extratags: FanFiction,Testing
|
||||
|
||||
# extracategories:Fafner
|
||||
|
|
@ -1237,6 +1244,7 @@ website_encodings:Windows-1252,utf8,iso-8859-1
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[archiveofourown.org]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1404,6 +1412,7 @@ extraships:Severus Snape/Hermione Granger
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[bloodshedverse.com]
|
||||
use_pagecache:true
|
||||
## website encoding(s) In theory, each website reports the character
|
||||
## encoding they use for each page. In practice, some sites report it
|
||||
## incorrectly. Each adapter has a default list, usually "utf8,
|
||||
|
|
@ -1480,7 +1489,11 @@ cover_exclusion_regexp:/images/.*?ribbon.gif
|
|||
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[chireads.com]
|
||||
use_pagecache:true
|
||||
|
||||
[chosentwofanfic.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:disclaimer
|
||||
disclaimer_label: Disclaimer
|
||||
add_to_titlepage_entries:,disclaimer
|
||||
|
|
@ -1592,6 +1605,7 @@ extracategories:Harry Potter
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[fanfic.tenhawkpresents.ink]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1602,6 +1616,7 @@ website_encodings:Windows-1252,utf8
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[fanficauthors.net]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1610,6 +1625,7 @@ website_encodings:Windows-1252,utf8
|
|||
#password:yourpassword
|
||||
|
||||
[fanfics.me]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1637,6 +1653,7 @@ make_linkhtml_entries:translators,betas
|
|||
include_in_category:fandoms
|
||||
|
||||
[fanfictalk.com]
|
||||
use_pagecache:true
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. In commandline version, this should go in your
|
||||
## personal.ini, not defaults.ini.
|
||||
|
|
@ -1705,6 +1722,9 @@ add_to_keep_html_attrs:,style
|
|||
[fanfiction-junkies.de]
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[fastnovel.net]
|
||||
use_pagecache:true
|
||||
|
||||
[fiction.live]
|
||||
## Recommended if you include images, fiction.live tends to have many
|
||||
## duplicated images.
|
||||
|
|
@ -1755,6 +1775,7 @@ add_to_output_css:
|
|||
}
|
||||
|
||||
[fictionhunt.com]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1777,6 +1798,7 @@ make_linkhtml_entries:origin
|
|||
add_to_extra_titlepage_entries:originHTML
|
||||
|
||||
[fictionmania.tv]
|
||||
use_pagecache:true
|
||||
website_encodings:ISO-8859-1,auto
|
||||
|
||||
## Extra metadata that this adapter knows about. See [archiveofourown.org]
|
||||
|
|
@ -1817,6 +1839,7 @@ likes_label:Likes
|
|||
dislikes_label:Dislikes
|
||||
|
||||
[ficwad.com]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1916,6 +1939,7 @@ reader_posts_per_page:30
|
|||
#password:yourpassword
|
||||
|
||||
[harrypotterfanfiction.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:reviews,era
|
||||
|
||||
## Site dedicated to these categories/characters/ships
|
||||
|
|
@ -1949,6 +1973,7 @@ website_encodings:Windows-1252,utf8
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[inkbunny.net]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1984,10 +2009,12 @@ extra_titlepage_entries:universe,crossoverfandom
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[lcfanfic.com]
|
||||
use_pagecache:true
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Lois & Clark: The New Adventures of Superman
|
||||
|
||||
[literotica.com]
|
||||
use_pagecache:true
|
||||
user_agent:
|
||||
extra_valid_entries:eroticatags,averrating
|
||||
eroticatags_label:Erotica Tags
|
||||
|
|
@ -2072,6 +2099,7 @@ extracharacters:Carol,Daryl
|
|||
extraships:Carol/Daryl
|
||||
|
||||
[novelonlinefull.com]
|
||||
use_pagecache:true
|
||||
website_encodings: utf8:ignore, Windows-1252, iso-8859-1
|
||||
|
||||
## Clear FanFiction from defaults, site is original fiction.
|
||||
|
|
@ -2124,6 +2152,7 @@ extracategories:The Pretender
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[quotev.com]
|
||||
use_pagecache:true
|
||||
user_agent:Mozilla/5.0
|
||||
slow_down_sleep_time:2
|
||||
extra_valid_entries:pages,readers,reads,favorites,searchtags,comments
|
||||
|
|
@ -2415,6 +2444,7 @@ slow_down_sleep_time:2
|
|||
#password:yourpassword
|
||||
|
||||
[storiesonline.net]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -2545,6 +2575,7 @@ readings_label: Readings
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[trekfanfiction.net]
|
||||
use_pagecache:true
|
||||
website_encodings:utf8,Windows-1252,iso-8859-1
|
||||
|
||||
[trekiverse.org]
|
||||
|
|
@ -2582,6 +2613,7 @@ reviews_label:Reviews
|
|||
readings_label:Readings
|
||||
|
||||
[wattpad.com]
|
||||
use_pagecache:true
|
||||
#is_adult:true
|
||||
extra_titlepage_entries: language, reads
|
||||
extra_valid_entries: language, tags, reads
|
||||
|
|
@ -2595,7 +2627,11 @@ add_to_comma_entries:,reads
|
|||
## are reports that sound like site throttling without it.
|
||||
slow_down_sleep_time:2
|
||||
|
||||
[wuxiaworld.site]
|
||||
use_pagecache:true
|
||||
|
||||
[www.adastrafanfic.com]
|
||||
use_pagecache:true
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
|
|
@ -2644,6 +2680,7 @@ website_encodings:Windows-1252,utf8
|
|||
strip_text_links:true
|
||||
|
||||
[www.asianfanfics.com]
|
||||
use_pagecache:true
|
||||
## Unlike most sites, asianfanfics.com, instead of denying access to
|
||||
## 'adult' or subscriber-only content, will censor the text of stories
|
||||
## to remove 'adult' words or entire portions of the text. This is why
|
||||
|
|
@ -2684,6 +2721,7 @@ include_in_category:tags
|
|||
auto_sub:false
|
||||
|
||||
[www.bdsmlibrary.com]
|
||||
use_pagecache:true
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. Uncomment by removing '#' in front of is_adult.
|
||||
#is_adult:true
|
||||
|
|
@ -2764,6 +2802,7 @@ type_label:Type of Couple
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.fanfiction.net]
|
||||
use_pagecache:true
|
||||
## Using cloudscraper can satisfy the first couple levels of
|
||||
## Cloudflare bot-proofing, but not all levels. Older versions of
|
||||
## OpenSSL will also raise problems, so versions of Calibre older than
|
||||
|
|
@ -2828,6 +2867,7 @@ dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
|||
check_next_chapter:false
|
||||
|
||||
[www.fanfiktion.de]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -2840,6 +2880,7 @@ check_next_chapter:false
|
|||
extra_valid_entries:native_status
|
||||
|
||||
[www.ficbook.net]
|
||||
use_pagecache:true
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
|
|
@ -2911,6 +2952,7 @@ datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
|
|||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
[www.fimfiction.net]
|
||||
use_pagecache:true
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
|
|
@ -3017,6 +3059,7 @@ add_to_titlepage_entries:,growth, shrink, sizeroles
|
|||
#password:yourpassword
|
||||
|
||||
[www.hentai-foundry.com]
|
||||
use_pagecache:true
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
|
|
@ -3103,6 +3146,7 @@ extracategories:Lord of the Rings
|
|||
#password:yourpassword
|
||||
|
||||
[www.lotrgfic.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:places, times
|
||||
places_label: Places
|
||||
times_label:Times
|
||||
|
|
@ -3130,6 +3174,7 @@ eroticatags_label:Erotica Tags
|
|||
extra_titlepage_entries:eroticatags
|
||||
|
||||
[www.masseffect2.in]
|
||||
use_pagecache:true
|
||||
## Site dedicated to this fandom.
|
||||
extracategories: Mass Effect
|
||||
|
||||
|
|
@ -3156,6 +3201,9 @@ adult_ratings: E,R
|
|||
website_encodings:utf8
|
||||
|
||||
[www.mediaminer.org]
|
||||
## Using pagecache with mediaminer.org caused SSL errors in Calibre.
|
||||
## I've no idea why, but turning off explicitly.
|
||||
use_pagecache:false
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
## Note that mediaminer doesn't give datePublished on the story's
|
||||
## index page--it's collected from the earliest uploaded chapter. So
|
||||
|
|
@ -3221,6 +3269,7 @@ extracategories:Naruto
|
|||
extracategories:NCIS
|
||||
|
||||
[www.novelall.com]
|
||||
use_pagecache:true
|
||||
website_encodings: utf8:ignore, Windows-1252, iso-8859-1
|
||||
|
||||
## Clear FanFiction from defaults, site is original fiction.
|
||||
|
|
@ -3306,6 +3355,7 @@ extracategories:Queer as Folk
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.royalroad.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:stars
|
||||
|
||||
#add_to_extra_titlepage_entries:,stars
|
||||
|
|
@ -3355,6 +3405,7 @@ extracharacters:Kurt Hummel,Blaine Anderson
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.scribblehub.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:views, averageWords
|
||||
views_label:Views
|
||||
averageWords_label:Average Words (Chapter)
|
||||
|
|
@ -3366,7 +3417,11 @@ add_to_titlepage_entries:,views, averageWords
|
|||
## personal.ini and list the ones you don't want.
|
||||
#exclude_notes:authornotes,newsboxes
|
||||
|
||||
[www.silmarillionwritersguild.org]
|
||||
use_pagecache:true
|
||||
|
||||
[www.siye.co.uk]
|
||||
use_pagecache:true
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Harry Potter
|
||||
extracharacters:Harry Potter,Ginny Weasley
|
||||
|
|
@ -3406,6 +3461,9 @@ extracategories:Lord of the Rings
|
|||
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.swi.org.ru]
|
||||
use_pagecache:true
|
||||
|
||||
[www.the-sietch.com]
|
||||
## see [base_xenforoforum]
|
||||
|
||||
|
|
@ -3447,6 +3505,7 @@ extracategories:Star Trek: Voyager
|
|||
#password:yourpassword
|
||||
|
||||
[www.tthfanfic.org]
|
||||
use_pagecache:true
|
||||
user_agent:
|
||||
slow_down_sleep_time:2
|
||||
## Some sites do not require a login, but do require the user to
|
||||
|
|
@ -3542,6 +3601,7 @@ extraships:Severus Snape/Harry Potter
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.webnovel.com]
|
||||
use_pagecache:true
|
||||
## Extra metadata that this adapter knows about. See [archiveofourown.org]
|
||||
## for examples of how to use them.
|
||||
extra_valid_entries:translator, editor, sitetags
|
||||
|
|
@ -3564,6 +3624,7 @@ extra_titlepage_entries: translator, editor, sitetags
|
|||
fix_pseudo_html:false
|
||||
|
||||
[www.whofic.com]
|
||||
use_pagecache:true
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.wolverineandrogue.com]
|
||||
|
|
@ -3589,12 +3650,14 @@ extracategories:Stargate: Atlantis
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.wuxiaworld.co]
|
||||
use_pagecache:true
|
||||
## Note that wuxiaworld.co != wuxiaworld.com
|
||||
## When dedup_order_chapter_list:true, use a heuristic algorithm
|
||||
## specific to wuxiaworld.co order and dedup chapters.
|
||||
dedup_order_chapter_list:false
|
||||
|
||||
[www.wuxiaworld.com]
|
||||
use_pagecache:true
|
||||
user_agent:Mozilla/5.0
|
||||
## Authors on wuxiaworld.com create their own index pages, so it's not
|
||||
## uncommon for there to be links to chapters that haven't been
|
||||
|
|
|
|||
|
|
@ -55,13 +55,6 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
|
|
|
|||
|
|
@ -139,13 +139,6 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
return True
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
|
|
|
|||
|
|
@ -107,13 +107,6 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
return False
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
url = self.url
|
||||
|
|
|
|||
|
|
@ -97,13 +97,6 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
|
|
|||
|
|
@ -59,13 +59,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
def stripURLParameters(cls, url):
|
||||
return url
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug("URL: "+self.url)
|
||||
|
||||
|
|
|
|||
|
|
@ -60,9 +60,6 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r'https?://chireads\.com/category/translatedtales/(?P<id>[^/]+)(/)?'
|
||||
|
||||
def use_pagecache(self):
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
|
|
|
|||
|
|
@ -70,13 +70,6 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
|
|
|
|||
|
|
@ -138,13 +138,6 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
################################################################################################
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
|
|
|
|||
|
|
@ -121,13 +121,6 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
return True
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
|
|
|
|||
|
|
@ -76,13 +76,6 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://(archive\.hp)?"+re.escape(self.getSiteDomain())+r"(/archive)?/viewstory\.php\?sid=\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
|
|
|
|||
|
|
@ -79,13 +79,6 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
extrasleep=extrasleep,
|
||||
usecache=usecache)
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## not actually putting urltitle on multi-chapters below, but
|
||||
## one-shots will have it, so this is still useful. normalized
|
||||
## chapter URLs do NOT contain the story title.
|
||||
|
|
|
|||
|
|
@ -68,13 +68,6 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?"+re.escape("://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Diese Geschichte wurde als entwicklungsbeeintr' in data \
|
||||
|
|
|
|||
|
|
@ -66,13 +66,6 @@ class FastNovelNetAdapter(BaseSiteAdapter):
|
|||
# https://fastnovel.net/ultimate-scheming-system-158/
|
||||
return r"https?://fastnovel\.net/(?P<id>[^/]+)"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
|
|
|
|||
|
|
@ -60,12 +60,6 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %m %Y"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
|
|
|
|||
|
|
@ -73,13 +73,6 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
## http://fictionhunt.com/read/12411643/1
|
||||
return r"https?://(www.)?fictionhunt.com/(?P<type>read|stories)/(?P<id>[0-9a-z]+)(/(?P<title>[^/]+))?(/|/[^/]+)*/?$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def needToLoginCheck(self, data):
|
||||
## FH is apparently reporting "Story has been removed" for all
|
||||
## chapters when not logged in now.
|
||||
|
|
|
|||
|
|
@ -44,13 +44,6 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
# merge chapters of a story
|
||||
self.story.setMetadata('numChapters', 1)
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return FictionManiaTVAdapter.SITE_DOMAIN
|
||||
|
|
|
|||
|
|
@ -74,13 +74,6 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
return True
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
|
|
|
|||
|
|
@ -65,13 +65,6 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://(www|mobile)\.fimfiction\.(net|com)/story/\d+/?.*"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def set_adult_cookie(self):
|
||||
cookie = cl.Cookie(version=0, name='view_mature', value='true',
|
||||
port=None, port_specified=False,
|
||||
|
|
|
|||
|
|
@ -55,13 +55,6 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?"+re.escape("://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
|
|
|
|||
|
|
@ -57,13 +57,6 @@ class HentaiFoundryComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?"+re.escape("://")+r"(www\.)?"+re.escape("hentai-foundry.com/stories/user/")+r"(?P<authorId>[^/]+)/(?P<storyId>\d+)/(?P<storyURLTitle>[^/]+)" # ignore any chapter
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
|
|
|||
|
|
@ -81,13 +81,6 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
# https://inkbunny.net/submissionview.php?id=1234567
|
||||
return r'https://' + re.escape(self.getSiteDomain()) + r'/(submissionview.php\?id=|s/)(?P<id>\d+)'
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def performLogin(self,url,soup):
|
||||
params = {
|
||||
'token':soup.find("input",{"name":"token"})['value'],
|
||||
|
|
|
|||
|
|
@ -83,13 +83,6 @@ class LCFanFicComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"http://"+re.escape(self.getSiteDomain())+r"/stories/([0-9]+|_earliest)/html/*(?P<id>[^/]+)"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
####################################################################################################
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
|
|
|||
|
|
@ -118,13 +118,6 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
# self.story.addToList('category', category.title())
|
||||
self.story.addToList('eroticatags', category.title())
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
"""
|
||||
NOTE: Some stories can have versions,
|
||||
|
|
|
|||
|
|
@ -60,13 +60,6 @@ class LOTRgficComAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
|
|
|
|||
|
|
@ -88,12 +88,6 @@ class MassEffect2InAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r'https?://(?:www\.)?masseffect2.in/publ/' + self.DOCUMENT_ID_PATTERN.pattern
|
||||
|
||||
def use_pagecache(self):
|
||||
"""Allows use of downloaded page cache. It is essential for this
|
||||
adapter, because the site does not offers chapter URL list, and many
|
||||
pages have to be fetched and parsed repeatedly."""
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
"""Extracts chapter URLs and story metadata. Actually downloads all
|
||||
chapters, which is not exactly right, but necessary due to technical
|
||||
|
|
|
|||
|
|
@ -101,13 +101,6 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
def stripURLParameters(cls, url):
|
||||
return url
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
Using pagecache with mediaminer.org caused SSL errors in
|
||||
Calibre. I've no idea why, but not caching doesn't cause
|
||||
it...
|
||||
'''
|
||||
return False
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
|
|
|
|||
|
|
@ -95,13 +95,6 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
|
|||
# http://novelonlinefull.com/novel/stellar_transformation
|
||||
return r"https?://(novelonlinefull|lightnovelgate)\.com/novel/(?P<id>[^/]+)"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
# metadata and chapter list
|
||||
|
|
|
|||
|
|
@ -60,9 +60,6 @@ class QuotevComAdapter(BaseSiteAdapter):
|
|||
pattern = pattern.replace(r'www\.', r'(www\.)?')
|
||||
return pattern
|
||||
|
||||
def use_pagecache(self):
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
data = self.get_request(self.url)
|
||||
|
||||
|
|
|
|||
|
|
@ -89,13 +89,6 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return "https?"+re.escape("://")+r"(www\.|)royalroadl?\.com/fiction/\d+(/.*)?$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def make_soup(self,data):
|
||||
soup = super(RoyalRoadAdapter, self).make_soup(data)
|
||||
self.handle_spoilers(soup)
|
||||
|
|
|
|||
|
|
@ -90,13 +90,6 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
def getSiteURLPattern(self):
|
||||
return re.escape("https://"+self.getSiteDomain())+r"/(series|read)/(?P<id>\d+)[/-](?P<title>[^/]+)"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
# Set cookie to ascending order before page loads, means we know date published
|
||||
def set_contents_cookie(self):
|
||||
cookie = cl.Cookie(version=0, name='toc_sorder', value='asc',
|
||||
|
|
|
|||
|
|
@ -66,13 +66,6 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/archive/home/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
|
|
|
|||
|
|
@ -71,13 +71,6 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://(www\.)?siye\.co\.uk/(siye/)?"+re.escape("viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
|
|
|
|||
|
|
@ -149,13 +149,6 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
username))
|
||||
raise exceptions.FailedToLogin(url,username)
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
|
|
|
|||
|
|
@ -53,13 +53,6 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"http://" + re.escape(self.getSiteDomain() + "/mlp-fim/story/")+r"\d+"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url=self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
|
|
|||
|
|
@ -58,13 +58,6 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
|
|||
# accept https, but don't use it--site SSL is broken.
|
||||
return r"https?:"+re.escape("//"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
|
|
|
|||
|
|
@ -52,9 +52,6 @@ class TestSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return BaseSiteAdapter.getSiteURLPattern(self)+r'/?\?sid=\d+$'
|
||||
|
||||
def use_pagecache(self):
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('extractChapterUrlsAndMetadata: %s' % self.url)
|
||||
idstr = self.story.getMetadata('storyId')
|
||||
|
|
|
|||
|
|
@ -83,13 +83,6 @@ class TrekFanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
return re.escape('https://{}'.format(
|
||||
self.getSiteDomain()))+r'/((?P<category>[^/]+)/)?(?P<author>[^/]+)/(?P<id>[^/]+)/?$'
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def get_request(self,url):
|
||||
try:
|
||||
return super(getClass(), self).get_request(url)
|
||||
|
|
|
|||
|
|
@ -63,13 +63,6 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P<id>\d+)(-\d+)?(/.*)?$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
# tth won't send you future updates if you aren't 'caught up'
|
||||
# on the story. Login isn't required for F21, but logging in will
|
||||
# mark stories you've downloaded as 'read' on tth.
|
||||
|
|
|
|||
|
|
@ -79,9 +79,6 @@ class WattpadComAdapter(BaseSiteAdapter):
|
|||
def getDateFormat(cls):
|
||||
return "%Y-%m-%dT%H:%M:%SZ"
|
||||
|
||||
def use_pagecache(self):
|
||||
return True
|
||||
|
||||
def getStoryId(self, url):
|
||||
storyIdInUrl = re.match(r'https://www\.wattpad\.com/story/(?P<storyId>\d+).*', url)
|
||||
if storyIdInUrl is not None:
|
||||
|
|
|
|||
|
|
@ -103,13 +103,6 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
|
|||
# https://www.webnovel.com/book/game-of-thrones%3A-the-prideful-one._17509790806343405
|
||||
return r'https://' + re.escape(self.getSiteDomain()) + r'/book/(?P<title>.*_)?(?P<id>\d+)'
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
# Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
url = self.url
|
||||
|
|
|
|||
|
|
@ -47,13 +47,6 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?"+re.escape("://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
|
|
|
|||
|
|
@ -64,9 +64,6 @@ class WuxiaWorldCoSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r'https?://(www|m)\.wuxiaworld\.co/(?P<id>[^/]+)(/)?'
|
||||
|
||||
def use_pagecache(self):
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
|
|
|
|||
|
|
@ -60,9 +60,6 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r'https?://%s/novel/(?P<id>[^/]+)(/)?' % re.escape(self.getSiteDomain())
|
||||
|
||||
def use_pagecache(self):
|
||||
return True
|
||||
|
||||
def _parse_linked_data(self, soup):
|
||||
# See https://json-ld.org
|
||||
tag = soup.find('script', type='application/ld+json')
|
||||
|
|
|
|||
|
|
@ -61,9 +61,6 @@ class WuxiaWorldSiteSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r'https?://%s/novel/(?P<id>[^/]+)(/)?' % re.escape(self.getSiteDomain())
|
||||
|
||||
def use_pagecache(self):
|
||||
return True
|
||||
|
||||
def _parse_linked_data(self, soup):
|
||||
# See https://json-ld.org
|
||||
tag = soup.find('script', type='application/ld+json')
|
||||
|
|
|
|||
|
|
@ -98,13 +98,6 @@ class WWWNovelAllComAdapter(BaseSiteAdapter):
|
|||
# https://www.novelall.com/chapter/The-Legendary-Moonlight-Sculptor-Volume-1-Chapter-1/1048282/
|
||||
return r"https://www\.novelall\.com/(?P<novchap>novel|chapter)/(?P<id>[^/\.]+)(/\d+/?)?(\.html)?$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
addurl = "?waring=1"
|
||||
|
|
|
|||
|
|
@ -113,13 +113,6 @@ class BaseSiteAdapter(Requestable):
|
|||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return False
|
||||
|
||||
def _section_url(self,url):
|
||||
'''
|
||||
For adapters that have story URLs that can change. This is
|
||||
|
|
|
|||
|
|
@ -185,13 +185,6 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
# logger.debug("post-url:%s"%url)
|
||||
return url
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def performLogin(self,data):
|
||||
params = {}
|
||||
|
||||
|
|
|
|||
|
|
@ -540,7 +540,7 @@ class Configuration(ConfigParser):
|
|||
self.fetcher = None # the network layer for getting pages the
|
||||
# caching layer for getting pages, created now for
|
||||
# get_empty_pagecache() etc.
|
||||
self.cache = fetcher.BaseCache()
|
||||
self.cache = fetcher.BasicCache()
|
||||
self.opener = None # used for _filelist
|
||||
|
||||
self.lightweight = lightweight
|
||||
|
|
@ -955,7 +955,7 @@ class Configuration(ConfigParser):
|
|||
|
||||
def get_fetcher(self):
|
||||
if not self.fetcher:
|
||||
logger.error(self.getConfig('use_cloudscraper'))
|
||||
logger.debug("use_cloudscraper:%s"%self.getConfig('use_cloudscraper'))
|
||||
if self.getConfig('use_cloudscraper',False):
|
||||
fetchcls = fetcher.CloudScraperFetcher
|
||||
else:
|
||||
|
|
@ -973,7 +973,9 @@ class Configuration(ConfigParser):
|
|||
# cache decorator terminates the chain when found. Cache
|
||||
# created in __init__ because of get_empty_pagecache()
|
||||
# etc, but not used until now.
|
||||
self.cache.decorate_fetcher(self.fetcher)
|
||||
logger.debug("use_pagecache:%s"%self.getConfig('use_pagecache'))
|
||||
if self.getConfig('use_pagecache'):
|
||||
fetcher.BasicCacheDecorator(self.cache).decorate_fetcher(self.fetcher)
|
||||
|
||||
if self.getConfig('progressbar'):
|
||||
fetcher.ProgressBarDecorator().decorate_fetcher(self.fetcher)
|
||||
|
|
|
|||
|
|
@ -574,6 +574,12 @@ normalize_text_links:true
|
|||
## normalize_text_links will improve URL matching considerably.
|
||||
internalize_text_links:true
|
||||
|
||||
## Of the ~140 supported sites, only ~50 have been checked to work
|
||||
## correctly with a page cache. The page cache is used save already
|
||||
## downloaded pages which can be called more than once, especially in
|
||||
## the Calibre plugin.
|
||||
use_pagecache:false
|
||||
|
||||
[base_efiction]
|
||||
|
||||
## At the time of writing, eFiction Base adapters allow downloading
|
||||
|
|
@ -588,6 +594,7 @@ storynotes_label:Story Notes
|
|||
add_to_extra_titlepage_entries:,storynotes
|
||||
|
||||
[base_xenforoforum]
|
||||
use_pagecache:true
|
||||
## Some sites require login for some stories
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
|
@ -1150,7 +1157,7 @@ windows_eol: true
|
|||
## URLs like: http://test1.com?sid=12345
|
||||
|
||||
[test1.com]
|
||||
|
||||
use_pagecache:true
|
||||
extratags: FanFiction,Testing
|
||||
|
||||
# extracategories:Fafner
|
||||
|
|
@ -1268,6 +1275,7 @@ website_encodings:Windows-1252,utf8,iso-8859-1
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[archiveofourown.org]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1435,6 +1443,7 @@ extraships:Severus Snape/Hermione Granger
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[bloodshedverse.com]
|
||||
use_pagecache:true
|
||||
## website encoding(s) In theory, each website reports the character
|
||||
## encoding they use for each page. In practice, some sites report it
|
||||
## incorrectly. Each adapter has a default list, usually "utf8,
|
||||
|
|
@ -1511,7 +1520,11 @@ cover_exclusion_regexp:/images/.*?ribbon.gif
|
|||
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[chireads.com]
|
||||
use_pagecache:true
|
||||
|
||||
[chosentwofanfic.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:disclaimer
|
||||
disclaimer_label: Disclaimer
|
||||
add_to_titlepage_entries:,disclaimer
|
||||
|
|
@ -1623,6 +1636,7 @@ extracategories:Harry Potter
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[fanfic.tenhawkpresents.ink]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1633,6 +1647,7 @@ website_encodings:Windows-1252,utf8
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[fanficauthors.net]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1641,6 +1656,7 @@ website_encodings:Windows-1252,utf8
|
|||
#password:yourpassword
|
||||
|
||||
[fanfics.me]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1668,6 +1684,7 @@ make_linkhtml_entries:translators,betas
|
|||
include_in_category:fandoms
|
||||
|
||||
[fanfictalk.com]
|
||||
use_pagecache:true
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. In commandline version, this should go in your
|
||||
## personal.ini, not defaults.ini.
|
||||
|
|
@ -1736,6 +1753,9 @@ add_to_keep_html_attrs:,style
|
|||
[fanfiction-junkies.de]
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[fastnovel.net]
|
||||
use_pagecache:true
|
||||
|
||||
[fiction.live]
|
||||
## Recommended if you include images, fiction.live tends to have many
|
||||
## duplicated images.
|
||||
|
|
@ -1786,6 +1806,7 @@ add_to_output_css:
|
|||
}
|
||||
|
||||
[fictionhunt.com]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1808,6 +1829,7 @@ make_linkhtml_entries:origin
|
|||
add_to_extra_titlepage_entries:originHTML
|
||||
|
||||
[fictionmania.tv]
|
||||
use_pagecache:true
|
||||
website_encodings:ISO-8859-1,auto
|
||||
|
||||
## Extra metadata that this adapter knows about. See [archiveofourown.org]
|
||||
|
|
@ -1848,6 +1870,7 @@ likes_label:Likes
|
|||
dislikes_label:Dislikes
|
||||
|
||||
[ficwad.com]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -1947,6 +1970,7 @@ reader_posts_per_page:30
|
|||
#password:yourpassword
|
||||
|
||||
[harrypotterfanfiction.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:reviews,era
|
||||
|
||||
## Site dedicated to these categories/characters/ships
|
||||
|
|
@ -1980,6 +2004,7 @@ website_encodings:Windows-1252,utf8
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[inkbunny.net]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -2015,10 +2040,12 @@ extra_titlepage_entries:universe,crossoverfandom
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[lcfanfic.com]
|
||||
use_pagecache:true
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Lois & Clark: The New Adventures of Superman
|
||||
|
||||
[literotica.com]
|
||||
use_pagecache:true
|
||||
user_agent:
|
||||
extra_valid_entries:eroticatags,averrating
|
||||
eroticatags_label:Erotica Tags
|
||||
|
|
@ -2103,6 +2130,7 @@ extracharacters:Carol,Daryl
|
|||
extraships:Carol/Daryl
|
||||
|
||||
[novelonlinefull.com]
|
||||
use_pagecache:true
|
||||
website_encodings: utf8:ignore, Windows-1252, iso-8859-1
|
||||
|
||||
## Clear FanFiction from defaults, site is original fiction.
|
||||
|
|
@ -2155,6 +2183,7 @@ extracategories:The Pretender
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[quotev.com]
|
||||
use_pagecache:true
|
||||
user_agent:Mozilla/5.0
|
||||
slow_down_sleep_time:2
|
||||
extra_valid_entries:pages,readers,reads,favorites,searchtags,comments
|
||||
|
|
@ -2446,6 +2475,7 @@ slow_down_sleep_time:2
|
|||
#password:yourpassword
|
||||
|
||||
[storiesonline.net]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -2576,6 +2606,7 @@ readings_label: Readings
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[trekfanfiction.net]
|
||||
use_pagecache:true
|
||||
website_encodings:utf8,Windows-1252,iso-8859-1
|
||||
|
||||
[trekiverse.org]
|
||||
|
|
@ -2613,6 +2644,7 @@ reviews_label:Reviews
|
|||
readings_label:Readings
|
||||
|
||||
[wattpad.com]
|
||||
use_pagecache:true
|
||||
#is_adult:true
|
||||
extra_titlepage_entries: language, reads
|
||||
extra_valid_entries: language, tags, reads
|
||||
|
|
@ -2626,7 +2658,11 @@ add_to_comma_entries:,reads
|
|||
## are reports that sound like site throttling without it.
|
||||
slow_down_sleep_time:2
|
||||
|
||||
[wuxiaworld.site]
|
||||
use_pagecache:true
|
||||
|
||||
[www.adastrafanfic.com]
|
||||
use_pagecache:true
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
|
|
@ -2675,6 +2711,7 @@ website_encodings:Windows-1252,utf8
|
|||
strip_text_links:true
|
||||
|
||||
[www.asianfanfics.com]
|
||||
use_pagecache:true
|
||||
## Unlike most sites, asianfanfics.com, instead of denying access to
|
||||
## 'adult' or subscriber-only content, will censor the text of stories
|
||||
## to remove 'adult' words or entire portions of the text. This is why
|
||||
|
|
@ -2715,6 +2752,7 @@ include_in_category:tags
|
|||
auto_sub:false
|
||||
|
||||
[www.bdsmlibrary.com]
|
||||
use_pagecache:true
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. Uncomment by removing '#' in front of is_adult.
|
||||
#is_adult:true
|
||||
|
|
@ -2795,6 +2833,7 @@ type_label:Type of Couple
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.fanfiction.net]
|
||||
use_pagecache:true
|
||||
## Using cloudscraper can satisfy the first couple levels of
|
||||
## Cloudflare bot-proofing, but not all levels. Older versions of
|
||||
## OpenSSL will also raise problems, so versions of Calibre older than
|
||||
|
|
@ -2850,6 +2889,7 @@ dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
|||
check_next_chapter:false
|
||||
|
||||
[www.fanfiktion.de]
|
||||
use_pagecache:true
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
|
|
@ -2862,6 +2902,7 @@ check_next_chapter:false
|
|||
extra_valid_entries:native_status
|
||||
|
||||
[www.ficbook.net]
|
||||
use_pagecache:true
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
|
|
@ -2933,6 +2974,7 @@ datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
|
|||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
[www.fimfiction.net]
|
||||
use_pagecache:true
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
|
|
@ -3039,6 +3081,7 @@ add_to_titlepage_entries:,growth, shrink, sizeroles
|
|||
#password:yourpassword
|
||||
|
||||
[www.hentai-foundry.com]
|
||||
use_pagecache:true
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
|
|
@ -3125,6 +3168,7 @@ extracategories:Lord of the Rings
|
|||
#password:yourpassword
|
||||
|
||||
[www.lotrgfic.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:places, times
|
||||
places_label: Places
|
||||
times_label:Times
|
||||
|
|
@ -3152,6 +3196,7 @@ eroticatags_label:Erotica Tags
|
|||
extra_titlepage_entries:eroticatags
|
||||
|
||||
[www.masseffect2.in]
|
||||
use_pagecache:true
|
||||
## Site dedicated to this fandom.
|
||||
extracategories: Mass Effect
|
||||
|
||||
|
|
@ -3178,6 +3223,9 @@ adult_ratings: E,R
|
|||
website_encodings:utf8
|
||||
|
||||
[www.mediaminer.org]
|
||||
## Using pagecache with mediaminer.org caused SSL errors in Calibre.
|
||||
## I've no idea why, but turning off explicitly.
|
||||
use_pagecache:false
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
## Note that mediaminer doesn't give datePublished on the story's
|
||||
## index page--it's collected from the earliest uploaded chapter. So
|
||||
|
|
@ -3243,6 +3291,7 @@ extracategories:Naruto
|
|||
extracategories:NCIS
|
||||
|
||||
[www.novelall.com]
|
||||
use_pagecache:true
|
||||
website_encodings: utf8:ignore, Windows-1252, iso-8859-1
|
||||
|
||||
## Clear FanFiction from defaults, site is original fiction.
|
||||
|
|
@ -3328,6 +3377,7 @@ extracategories:Queer as Folk
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.royalroad.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:stars
|
||||
|
||||
#add_to_extra_titlepage_entries:,stars
|
||||
|
|
@ -3377,6 +3427,7 @@ extracharacters:Kurt Hummel,Blaine Anderson
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.scribblehub.com]
|
||||
use_pagecache:true
|
||||
extra_valid_entries:views, averageWords
|
||||
views_label:Views
|
||||
averageWords_label:Average Words (Chapter)
|
||||
|
|
@ -3388,7 +3439,11 @@ add_to_titlepage_entries:,views, averageWords
|
|||
## personal.ini and list the ones you don't want.
|
||||
#exclude_notes:authornotes,newsboxes
|
||||
|
||||
[www.silmarillionwritersguild.org]
|
||||
use_pagecache:true
|
||||
|
||||
[www.siye.co.uk]
|
||||
use_pagecache:true
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Harry Potter
|
||||
extracharacters:Harry Potter,Ginny Weasley
|
||||
|
|
@ -3428,6 +3483,9 @@ extracategories:Lord of the Rings
|
|||
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.swi.org.ru]
|
||||
use_pagecache:true
|
||||
|
||||
[www.the-sietch.com]
|
||||
## see [base_xenforoforum]
|
||||
|
||||
|
|
@ -3469,6 +3527,7 @@ extracategories:Star Trek: Voyager
|
|||
#password:yourpassword
|
||||
|
||||
[www.tthfanfic.org]
|
||||
use_pagecache:true
|
||||
user_agent:
|
||||
slow_down_sleep_time:2
|
||||
## Some sites do not require a login, but do require the user to
|
||||
|
|
@ -3564,6 +3623,7 @@ extraships:Severus Snape/Harry Potter
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.webnovel.com]
|
||||
use_pagecache:true
|
||||
## Extra metadata that this adapter knows about. See [archiveofourown.org]
|
||||
## for examples of how to use them.
|
||||
extra_valid_entries:translator, editor, sitetags
|
||||
|
|
@ -3586,6 +3646,7 @@ extra_titlepage_entries: translator, editor, sitetags
|
|||
fix_pseudo_html:false
|
||||
|
||||
[www.whofic.com]
|
||||
use_pagecache:true
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.wolverineandrogue.com]
|
||||
|
|
@ -3611,12 +3672,14 @@ extracategories:Stargate: Atlantis
|
|||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.wuxiaworld.co]
|
||||
use_pagecache:true
|
||||
## Note that wuxiaworld.co != wuxiaworld.com
|
||||
## When dedup_order_chapter_list:true, use a heuristic algorithm
|
||||
## specific to wuxiaworld.co order and dedup chapters.
|
||||
dedup_order_chapter_list:false
|
||||
|
||||
[www.wuxiaworld.com]
|
||||
use_pagecache:true
|
||||
user_agent:Mozilla/5.0
|
||||
## Authors on wuxiaworld.com create their own index pages, so it's not
|
||||
## uncommon for there to be links to chapters that haven't been
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import logging
|
|||
import sys
|
||||
import pickle
|
||||
from functools import partial
|
||||
import threading
|
||||
|
||||
from urllib3.util.retry import Retry
|
||||
import requests
|
||||
|
|
@ -164,9 +165,9 @@ class SleepDecorator(FetcherDecorator):
|
|||
|
||||
return fetchresp
|
||||
|
||||
class BaseCache(FetcherDecorator):
|
||||
class BasicCache(object):
|
||||
def __init__(self):
|
||||
super(BaseCache,self).__init__()
|
||||
self.cache_lock = threading.RLock()
|
||||
self.pagecache = self.get_empty_pagecache()
|
||||
self.save_cache_file = None
|
||||
|
||||
|
|
@ -174,34 +175,42 @@ class BaseCache(FetcherDecorator):
|
|||
return {}
|
||||
|
||||
def get_pagecache(self):
|
||||
return self.pagecache
|
||||
with self.cache_lock:
|
||||
return self.pagecache
|
||||
|
||||
def set_pagecache(self,d,save_cache_file=None):
|
||||
self.save_cache_file = save_cache_file
|
||||
self.pagecache=d
|
||||
with self.cache_lock:
|
||||
self.save_cache_file = save_cache_file
|
||||
self.pagecache=d
|
||||
|
||||
def make_cachekey(self, url, parameters=None):
|
||||
keylist=[url]
|
||||
if parameters != None:
|
||||
keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items())))
|
||||
return unicode('?'.join(keylist))
|
||||
with self.cache_lock:
|
||||
keylist=[url]
|
||||
if parameters != None:
|
||||
keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items())))
|
||||
return unicode('?'.join(keylist))
|
||||
|
||||
def has_cachekey(self,cachekey):
|
||||
return self.use_pagecache and cachekey in self.get_pagecache()
|
||||
with self.cache_lock:
|
||||
return cachekey in self.get_pagecache()
|
||||
|
||||
def get_from_cache(self,cachekey):
|
||||
if self.use_pagecache:
|
||||
return self.get_pagecache().get(cachekey)
|
||||
else:
|
||||
return None
|
||||
with self.cache_lock:
|
||||
return self.get_pagecache().get(cachekey,None)
|
||||
|
||||
def set_to_cache(self,cachekey,data,redirectedurl):
|
||||
if self.use_pagecache:
|
||||
with self.cache_lock:
|
||||
self.get_pagecache()[cachekey] = (data,ensure_text(redirectedurl))
|
||||
if self.save_cache_file:
|
||||
with open(self.save_cache_file,'wb') as jout:
|
||||
pickle.dump(self.get_pagecache(),jout,protocol=2)
|
||||
|
||||
|
||||
class BasicCacheDecorator(FetcherDecorator):
|
||||
def __init__(self,cache):
|
||||
super(BasicCacheDecorator,self).__init__()
|
||||
self.cache = cache
|
||||
|
||||
def fetcher_do_request(self,
|
||||
fetcher,
|
||||
chainfn,
|
||||
|
|
@ -216,12 +225,12 @@ class BaseCache(FetcherDecorator):
|
|||
Note that usecache=False prevents lookup, but cache still saves
|
||||
result
|
||||
'''
|
||||
logger.debug("BaseCache fetcher_do_request")
|
||||
cachekey=self.make_cachekey(url, parameters)
|
||||
logger.debug("BasicCacheDecorator fetcher_do_request")
|
||||
cachekey=self.cache.make_cachekey(url, parameters)
|
||||
|
||||
if usecache and self.has_cachekey(cachekey) and not cachekey.startswith('file:'):
|
||||
if usecache and self.cache.has_cachekey(cachekey) and not cachekey.startswith('file:'):
|
||||
logger.debug("#####################################\npagecache(%s) HIT: %s"%(method,safe_url(cachekey)))
|
||||
data,redirecturl = self.get_from_cache(cachekey)
|
||||
data,redirecturl = self.cache.get_from_cache(cachekey)
|
||||
return FetcherResponse(data,redirecturl=redirecturl,fromcache=True)
|
||||
|
||||
logger.debug("#####################################\npagecache(%s) MISS: %s"%(method,safe_url(cachekey)))
|
||||
|
|
@ -241,9 +250,9 @@ class BaseCache(FetcherDecorator):
|
|||
## saved-cache and wondering why file changes aren't showing
|
||||
## up.
|
||||
if not fetchresp.fromcache:
|
||||
self.set_to_cache(cachekey,data,fetchresp.redirecturl)
|
||||
self.cache.set_to_cache(cachekey,data,fetchresp.redirecturl)
|
||||
if url != fetchresp.redirecturl: # cache both?
|
||||
self.set_to_cache(cachekey,data,url)
|
||||
self.cache.set_to_cache(cachekey,data,url)
|
||||
return fetchresp
|
||||
|
||||
class FetcherResponse(object):
|
||||
|
|
|
|||
|
|
@ -26,11 +26,6 @@ class Requestable(Configurable):
|
|||
def __init__(self, configuration):
|
||||
Configurable.__init__(self,configuration)
|
||||
|
||||
## use_pagecache() is on adapters--not all have been updated
|
||||
## to deal with caching correctly
|
||||
if hasattr(self, 'use_pagecache'):
|
||||
self.configuration.cache.use_pagecache = self.use_pagecache()
|
||||
|
||||
## website encoding(s)--in theory, each website reports the character
|
||||
## encoding they use for each page. In practice, some sites report it
|
||||
## incorrectly. Each adapter has a default list, usually "utf8,
|
||||
|
|
|
|||
Loading…
Reference in a new issue