diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index e86a9472..5d1ce2ac 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -145,7 +145,7 @@ include_tocpage: true ## entries to make epub subjects and calibre tags ## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d" -include_subject_tags: extratags, genre, category, characters, ships, status + include_subject_tags: extratags, genre, category, characters, ships, status ## extra tags (comma separated) to include, primarily for epub. extratags: FanFiction @@ -547,6 +547,12 @@ normalize_text_links:true ## normalize_text_links will improve URL matching considerably. internalize_text_links:true +## Of the ~140 supported sites, only ~50 have been checked to work +## correctly with a page cache. The page cache is used save already +## downloaded pages which can be called more than once, especially in +## the Calibre plugin. +use_pagecache:false + [base_efiction] ## At the time of writing, eFiction Base adapters allow downloading @@ -561,6 +567,7 @@ storynotes_label:Story Notes add_to_extra_titlepage_entries:,storynotes [base_xenforoforum] +use_pagecache:true ## Some sites require login for some stories #username:YourName #password:yourpassword @@ -1119,7 +1126,7 @@ windows_eol: true ## URLs like: http://test1.com?sid=12345 [test1.com] - +use_pagecache:true extratags: FanFiction,Testing # extracategories:Fafner @@ -1237,6 +1244,7 @@ website_encodings:Windows-1252,utf8,iso-8859-1 website_encodings:Windows-1252,utf8 [archiveofourown.org] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1404,6 +1412,7 @@ extraships:Severus Snape/Hermione Granger website_encodings:Windows-1252,utf8 [bloodshedverse.com] +use_pagecache:true ## website encoding(s) In theory, each website reports the character ## encoding they use for each page. In practice, some sites report it ## incorrectly. Each adapter has a default list, usually "utf8, @@ -1480,7 +1489,11 @@ cover_exclusion_regexp:/images/.*?ribbon.gif website_encodings:Windows-1252,utf8 +[chireads.com] +use_pagecache:true + [chosentwofanfic.com] +use_pagecache:true extra_valid_entries:disclaimer disclaimer_label: Disclaimer add_to_titlepage_entries:,disclaimer @@ -1592,6 +1605,7 @@ extracategories:Harry Potter website_encodings:Windows-1252,utf8 [fanfic.tenhawkpresents.ink] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1602,6 +1616,7 @@ website_encodings:Windows-1252,utf8 website_encodings:Windows-1252,utf8 [fanficauthors.net] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1610,6 +1625,7 @@ website_encodings:Windows-1252,utf8 #password:yourpassword [fanfics.me] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1637,6 +1653,7 @@ make_linkhtml_entries:translators,betas include_in_category:fandoms [fanfictalk.com] +use_pagecache:true ## Some sites also require the user to confirm they are adult for ## adult content. In commandline version, this should go in your ## personal.ini, not defaults.ini. @@ -1705,6 +1722,9 @@ add_to_keep_html_attrs:,style [fanfiction-junkies.de] website_encodings:Windows-1252,utf8 +[fastnovel.net] +use_pagecache:true + [fiction.live] ## Recommended if you include images, fiction.live tends to have many ## duplicated images. @@ -1755,6 +1775,7 @@ add_to_output_css: } [fictionhunt.com] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1777,6 +1798,7 @@ make_linkhtml_entries:origin add_to_extra_titlepage_entries:originHTML [fictionmania.tv] +use_pagecache:true website_encodings:ISO-8859-1,auto ## Extra metadata that this adapter knows about. See [archiveofourown.org] @@ -1817,6 +1839,7 @@ likes_label:Likes dislikes_label:Dislikes [ficwad.com] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1916,6 +1939,7 @@ reader_posts_per_page:30 #password:yourpassword [harrypotterfanfiction.com] +use_pagecache:true extra_valid_entries:reviews,era ## Site dedicated to these categories/characters/ships @@ -1949,6 +1973,7 @@ website_encodings:Windows-1252,utf8 website_encodings:Windows-1252,utf8 [inkbunny.net] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1984,10 +2009,12 @@ extra_titlepage_entries:universe,crossoverfandom website_encodings:Windows-1252,utf8 [lcfanfic.com] +use_pagecache:true ## Site dedicated to these categories/characters/ships extracategories:Lois & Clark: The New Adventures of Superman [literotica.com] +use_pagecache:true user_agent: extra_valid_entries:eroticatags,averrating eroticatags_label:Erotica Tags @@ -2072,6 +2099,7 @@ extracharacters:Carol,Daryl extraships:Carol/Daryl [novelonlinefull.com] +use_pagecache:true website_encodings: utf8:ignore, Windows-1252, iso-8859-1 ## Clear FanFiction from defaults, site is original fiction. @@ -2124,6 +2152,7 @@ extracategories:The Pretender website_encodings:Windows-1252,utf8 [quotev.com] +use_pagecache:true user_agent:Mozilla/5.0 slow_down_sleep_time:2 extra_valid_entries:pages,readers,reads,favorites,searchtags,comments @@ -2415,6 +2444,7 @@ slow_down_sleep_time:2 #password:yourpassword [storiesonline.net] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -2545,6 +2575,7 @@ readings_label: Readings website_encodings:Windows-1252,utf8 [trekfanfiction.net] +use_pagecache:true website_encodings:utf8,Windows-1252,iso-8859-1 [trekiverse.org] @@ -2582,6 +2613,7 @@ reviews_label:Reviews readings_label:Readings [wattpad.com] +use_pagecache:true #is_adult:true extra_titlepage_entries: language, reads extra_valid_entries: language, tags, reads @@ -2595,7 +2627,11 @@ add_to_comma_entries:,reads ## are reports that sound like site throttling without it. slow_down_sleep_time:2 +[wuxiaworld.site] +use_pagecache:true + [www.adastrafanfic.com] +use_pagecache:true ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. @@ -2644,6 +2680,7 @@ website_encodings:Windows-1252,utf8 strip_text_links:true [www.asianfanfics.com] +use_pagecache:true ## Unlike most sites, asianfanfics.com, instead of denying access to ## 'adult' or subscriber-only content, will censor the text of stories ## to remove 'adult' words or entire portions of the text. This is why @@ -2684,6 +2721,7 @@ include_in_category:tags auto_sub:false [www.bdsmlibrary.com] +use_pagecache:true ## Some sites also require the user to confirm they are adult for ## adult content. Uncomment by removing '#' in front of is_adult. #is_adult:true @@ -2764,6 +2802,7 @@ type_label:Type of Couple website_encodings:Windows-1252,utf8 [www.fanfiction.net] +use_pagecache:true ## Using cloudscraper can satisfy the first couple levels of ## Cloudflare bot-proofing, but not all levels. Older versions of ## OpenSSL will also raise problems, so versions of Calibre older than @@ -2828,6 +2867,7 @@ dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S check_next_chapter:false [www.fanfiktion.de] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -2840,6 +2880,7 @@ check_next_chapter:false extra_valid_entries:native_status [www.ficbook.net] +use_pagecache:true ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. @@ -2911,6 +2952,7 @@ datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S [www.fimfiction.net] +use_pagecache:true ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. @@ -3017,6 +3059,7 @@ add_to_titlepage_entries:,growth, shrink, sizeroles #password:yourpassword [www.hentai-foundry.com] +use_pagecache:true ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. @@ -3103,6 +3146,7 @@ extracategories:Lord of the Rings #password:yourpassword [www.lotrgfic.com] +use_pagecache:true extra_valid_entries:places, times places_label: Places times_label:Times @@ -3130,6 +3174,7 @@ eroticatags_label:Erotica Tags extra_titlepage_entries:eroticatags [www.masseffect2.in] +use_pagecache:true ## Site dedicated to this fandom. extracategories: Mass Effect @@ -3156,6 +3201,9 @@ adult_ratings: E,R website_encodings:utf8 [www.mediaminer.org] +## Using pagecache with mediaminer.org caused SSL errors in Calibre. +## I've no idea why, but turning off explicitly. +use_pagecache:false dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S ## Note that mediaminer doesn't give datePublished on the story's ## index page--it's collected from the earliest uploaded chapter. So @@ -3221,6 +3269,7 @@ extracategories:Naruto extracategories:NCIS [www.novelall.com] +use_pagecache:true website_encodings: utf8:ignore, Windows-1252, iso-8859-1 ## Clear FanFiction from defaults, site is original fiction. @@ -3306,6 +3355,7 @@ extracategories:Queer as Folk website_encodings:Windows-1252,utf8 [www.royalroad.com] +use_pagecache:true extra_valid_entries:stars #add_to_extra_titlepage_entries:,stars @@ -3355,6 +3405,7 @@ extracharacters:Kurt Hummel,Blaine Anderson website_encodings:Windows-1252,utf8 [www.scribblehub.com] +use_pagecache:true extra_valid_entries:views, averageWords views_label:Views averageWords_label:Average Words (Chapter) @@ -3366,7 +3417,11 @@ add_to_titlepage_entries:,views, averageWords ## personal.ini and list the ones you don't want. #exclude_notes:authornotes,newsboxes +[www.silmarillionwritersguild.org] +use_pagecache:true + [www.siye.co.uk] +use_pagecache:true ## Site dedicated to these categories/characters/ships extracategories:Harry Potter extracharacters:Harry Potter,Ginny Weasley @@ -3406,6 +3461,9 @@ extracategories:Lord of the Rings website_encodings:Windows-1252,utf8 +[www.swi.org.ru] +use_pagecache:true + [www.the-sietch.com] ## see [base_xenforoforum] @@ -3447,6 +3505,7 @@ extracategories:Star Trek: Voyager #password:yourpassword [www.tthfanfic.org] +use_pagecache:true user_agent: slow_down_sleep_time:2 ## Some sites do not require a login, but do require the user to @@ -3542,6 +3601,7 @@ extraships:Severus Snape/Harry Potter website_encodings:Windows-1252,utf8 [www.webnovel.com] +use_pagecache:true ## Extra metadata that this adapter knows about. See [archiveofourown.org] ## for examples of how to use them. extra_valid_entries:translator, editor, sitetags @@ -3564,6 +3624,7 @@ extra_titlepage_entries: translator, editor, sitetags fix_pseudo_html:false [www.whofic.com] +use_pagecache:true website_encodings:Windows-1252,utf8 [www.wolverineandrogue.com] @@ -3589,12 +3650,14 @@ extracategories:Stargate: Atlantis website_encodings:Windows-1252,utf8 [www.wuxiaworld.co] +use_pagecache:true ## Note that wuxiaworld.co != wuxiaworld.com ## When dedup_order_chapter_list:true, use a heuristic algorithm ## specific to wuxiaworld.co order and dedup chapters. dedup_order_chapter_list:false [www.wuxiaworld.com] +use_pagecache:true user_agent:Mozilla/5.0 ## Authors on wuxiaworld.com create their own index pages, so it's not ## uncommon for there to be links to chapters that haven't been diff --git a/fanficfare/adapters/adapter_adastrafanficcom.py b/fanficfare/adapters/adapter_adastrafanficcom.py index 0c37c908..cff3de59 100644 --- a/fanficfare/adapters/adapter_adastrafanficcom.py +++ b/fanficfare/adapters/adapter_adastrafanficcom.py @@ -55,13 +55,6 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): if self.is_adult or self.getConfig("is_adult"): diff --git a/fanficfare/adapters/adapter_archiveofourownorg.py b/fanficfare/adapters/adapter_archiveofourownorg.py index 83efb047..e2522d63 100644 --- a/fanficfare/adapters/adapter_archiveofourownorg.py +++ b/fanficfare/adapters/adapter_archiveofourownorg.py @@ -139,13 +139,6 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): else: return True - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): diff --git a/fanficfare/adapters/adapter_asianfanficscom.py b/fanficfare/adapters/adapter_asianfanficscom.py index 3588b89c..61df675e 100644 --- a/fanficfare/adapters/adapter_asianfanficscom.py +++ b/fanficfare/adapters/adapter_asianfanficscom.py @@ -107,13 +107,6 @@ class AsianFanFicsComAdapter(BaseSiteAdapter): else: return False - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## Getting the chapter list and the meta data, plus 'is adult' checking. def doExtractChapterUrlsAndMetadata(self,get_cover=True): url = self.url diff --git a/fanficfare/adapters/adapter_bdsmlibrarycom.py b/fanficfare/adapters/adapter_bdsmlibrarycom.py index 5dfba911..fcabf674 100644 --- a/fanficfare/adapters/adapter_bdsmlibrarycom.py +++ b/fanficfare/adapters/adapter_bdsmlibrarycom.py @@ -97,13 +97,6 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?://"+re.escape(self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): if not (self.is_adult or self.getConfig("is_adult")): raise exceptions.AdultCheckRequired(self.url) diff --git a/fanficfare/adapters/adapter_bloodshedversecom.py b/fanficfare/adapters/adapter_bloodshedversecom.py index e487451b..0287a41d 100644 --- a/fanficfare/adapters/adapter_bloodshedversecom.py +++ b/fanficfare/adapters/adapter_bloodshedversecom.py @@ -59,13 +59,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter): def stripURLParameters(cls, url): return url - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): logger.debug("URL: "+self.url) diff --git a/fanficfare/adapters/adapter_chireadscom.py b/fanficfare/adapters/adapter_chireadscom.py index a29230b7..05543c43 100644 --- a/fanficfare/adapters/adapter_chireadscom.py +++ b/fanficfare/adapters/adapter_chireadscom.py @@ -60,9 +60,6 @@ class ChireadsComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r'https?://chireads\.com/category/translatedtales/(?P[^/]+)(/)?' - def use_pagecache(self): - return True - def extractChapterUrlsAndMetadata(self): logger.debug('URL: %s', self.url) diff --git a/fanficfare/adapters/adapter_chosentwofanficcom.py b/fanficfare/adapters/adapter_chosentwofanficcom.py index 2d56071e..ca87af5c 100644 --- a/fanficfare/adapters/adapter_chosentwofanficcom.py +++ b/fanficfare/adapters/adapter_chosentwofanficcom.py @@ -70,13 +70,6 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): diff --git a/fanficfare/adapters/adapter_fanficauthorsnet.py b/fanficfare/adapters/adapter_fanficauthorsnet.py index 2063d82f..f5e2118c 100644 --- a/fanficfare/adapters/adapter_fanficauthorsnet.py +++ b/fanficfare/adapters/adapter_fanficauthorsnet.py @@ -138,13 +138,6 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/' - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ################################################################################################ def doExtractChapterUrlsAndMetadata(self, get_cover=True): diff --git a/fanficfare/adapters/adapter_fanficsme.py b/fanficfare/adapters/adapter_fanficsme.py index 188d573a..dd14d7aa 100644 --- a/fanficfare/adapters/adapter_fanficsme.py +++ b/fanficfare/adapters/adapter_fanficsme.py @@ -121,13 +121,6 @@ class FanFicsMeAdapter(BaseSiteAdapter): else: return True - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): diff --git a/fanficfare/adapters/adapter_fanfictalkcom.py b/fanficfare/adapters/adapter_fanfictalkcom.py index c1e74920..fc18bda6 100644 --- a/fanficfare/adapters/adapter_fanfictalkcom.py +++ b/fanficfare/adapters/adapter_fanfictalkcom.py @@ -76,13 +76,6 @@ class FanfictalkComAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?://(archive\.hp)?"+re.escape(self.getSiteDomain())+r"(/archive)?/viewstory\.php\?sid=\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): diff --git a/fanficfare/adapters/adapter_fanfictionnet.py b/fanficfare/adapters/adapter_fanfictionnet.py index fe6ce5d6..3e4bf7d0 100644 --- a/fanficfare/adapters/adapter_fanfictionnet.py +++ b/fanficfare/adapters/adapter_fanfictionnet.py @@ -79,13 +79,6 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): extrasleep=extrasleep, usecache=usecache) - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## not actually putting urltitle on multi-chapters below, but ## one-shots will have it, so this is still useful. normalized ## chapter URLs do NOT contain the story title. diff --git a/fanficfare/adapters/adapter_fanfiktionde.py b/fanficfare/adapters/adapter_fanfiktionde.py index 1cee9b2e..fdc97745 100644 --- a/fanficfare/adapters/adapter_fanfiktionde.py +++ b/fanficfare/adapters/adapter_fanfiktionde.py @@ -68,13 +68,6 @@ class FanFiktionDeAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?"+re.escape("://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## Login seems to be reasonably standard across eFiction sites. def needToLoginCheck(self, data): if 'Diese Geschichte wurde als entwicklungsbeeintr' in data \ diff --git a/fanficfare/adapters/adapter_fastnovelnet.py b/fanficfare/adapters/adapter_fastnovelnet.py index 44dd25c2..9dc1027d 100644 --- a/fanficfare/adapters/adapter_fastnovelnet.py +++ b/fanficfare/adapters/adapter_fastnovelnet.py @@ -66,13 +66,6 @@ class FastNovelNetAdapter(BaseSiteAdapter): # https://fastnovel.net/ultimate-scheming-system-158/ return r"https?://fastnovel\.net/(?P[^/]+)" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): logger.debug('URL: %s', self.url) diff --git a/fanficfare/adapters/adapter_ficbooknet.py b/fanficfare/adapters/adapter_ficbooknet.py index 83cd5e2f..5cc84611 100644 --- a/fanficfare/adapters/adapter_ficbooknet.py +++ b/fanficfare/adapters/adapter_ficbooknet.py @@ -60,12 +60,6 @@ class FicBookNetAdapter(BaseSiteAdapter): # http://docs.python.org/library/datetime.html#strftime-strptime-behavior self.dateformat = "%d %m %Y" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True @staticmethod # must be @staticmethod, don't remove it. def getSiteDomain(): # The site domain. Does have www here, if it uses it. diff --git a/fanficfare/adapters/adapter_fictionhuntcom.py b/fanficfare/adapters/adapter_fictionhuntcom.py index 8aaf9063..9b84782c 100644 --- a/fanficfare/adapters/adapter_fictionhuntcom.py +++ b/fanficfare/adapters/adapter_fictionhuntcom.py @@ -73,13 +73,6 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter): ## http://fictionhunt.com/read/12411643/1 return r"https?://(www.)?fictionhunt.com/(?Pread|stories)/(?P[0-9a-z]+)(/(?P[^/]+))?(/|/[^/]+)*/?$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def needToLoginCheck(self, data): ## FH is apparently reporting "Story has been removed" for all ## chapters when not logged in now. diff --git a/fanficfare/adapters/adapter_fictionmaniatv.py b/fanficfare/adapters/adapter_fictionmaniatv.py index f2fbdff7..29a05cd0 100644 --- a/fanficfare/adapters/adapter_fictionmaniatv.py +++ b/fanficfare/adapters/adapter_fictionmaniatv.py @@ -44,13 +44,6 @@ class FictionManiaTVAdapter(BaseSiteAdapter): # merge chapters of a story self.story.setMetadata('numChapters', 1) - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - @staticmethod def getSiteDomain(): return FictionManiaTVAdapter.SITE_DOMAIN diff --git a/fanficfare/adapters/adapter_ficwadcom.py b/fanficfare/adapters/adapter_ficwadcom.py index c1d8321d..ac240a31 100644 --- a/fanficfare/adapters/adapter_ficwadcom.py +++ b/fanficfare/adapters/adapter_ficwadcom.py @@ -74,13 +74,6 @@ class FicwadComSiteAdapter(BaseSiteAdapter): else: return True - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): # fetch the chapter. From that we will get almost all the diff --git a/fanficfare/adapters/adapter_fimfictionnet.py b/fanficfare/adapters/adapter_fimfictionnet.py index 180e8433..e93b03d9 100644 --- a/fanficfare/adapters/adapter_fimfictionnet.py +++ b/fanficfare/adapters/adapter_fimfictionnet.py @@ -65,13 +65,6 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?://(www|mobile)\.fimfiction\.(net|com)/story/\d+/?.*" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def set_adult_cookie(self): cookie = cl.Cookie(version=0, name='view_mature', value='true', port=None, port_specified=False, diff --git a/fanficfare/adapters/adapter_harrypotterfanfictioncom.py b/fanficfare/adapters/adapter_harrypotterfanfictioncom.py index 21e82795..5823be05 100644 --- a/fanficfare/adapters/adapter_harrypotterfanfictioncom.py +++ b/fanficfare/adapters/adapter_harrypotterfanfictioncom.py @@ -55,13 +55,6 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?"+re.escape("://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): url = self.url diff --git a/fanficfare/adapters/adapter_hentaifoundrycom.py b/fanficfare/adapters/adapter_hentaifoundrycom.py index df5e1584..3af1c5fc 100644 --- a/fanficfare/adapters/adapter_hentaifoundrycom.py +++ b/fanficfare/adapters/adapter_hentaifoundrycom.py @@ -57,13 +57,6 @@ class HentaiFoundryComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?"+re.escape("://")+r"(www\.)?"+re.escape("hentai-foundry.com/stories/user/")+r"(?P<authorId>[^/]+)/(?P<storyId>\d+)/(?P<storyURLTitle>[^/]+)" # ignore any chapter - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): url = self.url logger.debug("URL: "+url) diff --git a/fanficfare/adapters/adapter_inkbunnynet.py b/fanficfare/adapters/adapter_inkbunnynet.py index c6af2b6e..ebc33ed5 100644 --- a/fanficfare/adapters/adapter_inkbunnynet.py +++ b/fanficfare/adapters/adapter_inkbunnynet.py @@ -81,13 +81,6 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter): # https://inkbunny.net/submissionview.php?id=1234567 return r'https://' + re.escape(self.getSiteDomain()) + r'/(submissionview.php\?id=|s/)(?P<id>\d+)' - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def performLogin(self,url,soup): params = { 'token':soup.find("input",{"name":"token"})['value'], diff --git a/fanficfare/adapters/adapter_lcfanficcom.py b/fanficfare/adapters/adapter_lcfanficcom.py index ba75e0b5..e92d3b7e 100644 --- a/fanficfare/adapters/adapter_lcfanficcom.py +++ b/fanficfare/adapters/adapter_lcfanficcom.py @@ -83,13 +83,6 @@ class LCFanFicComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"http://"+re.escape(self.getSiteDomain())+r"/stories/([0-9]+|_earliest)/html/*(?P<id>[^/]+)" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - #################################################################################################### ## Getting the chapter list and the meta data, plus 'is adult' checking. def doExtractChapterUrlsAndMetadata(self, get_cover=True): diff --git a/fanficfare/adapters/adapter_literotica.py b/fanficfare/adapters/adapter_literotica.py index dec77726..4c139661 100644 --- a/fanficfare/adapters/adapter_literotica.py +++ b/fanficfare/adapters/adapter_literotica.py @@ -118,13 +118,6 @@ class LiteroticaSiteAdapter(BaseSiteAdapter): # self.story.addToList('category', category.title()) self.story.addToList('eroticatags', category.title()) - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): """ NOTE: Some stories can have versions, diff --git a/fanficfare/adapters/adapter_lotrgficcom.py b/fanficfare/adapters/adapter_lotrgficcom.py index 19cadf97..e4262fe5 100644 --- a/fanficfare/adapters/adapter_lotrgficcom.py +++ b/fanficfare/adapters/adapter_lotrgficcom.py @@ -60,13 +60,6 @@ class LOTRgficComAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): if self.is_adult or self.getConfig("is_adult"): diff --git a/fanficfare/adapters/adapter_masseffect2in.py b/fanficfare/adapters/adapter_masseffect2in.py index 5f4700d7..5b9a5d83 100644 --- a/fanficfare/adapters/adapter_masseffect2in.py +++ b/fanficfare/adapters/adapter_masseffect2in.py @@ -88,12 +88,6 @@ class MassEffect2InAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r'https?://(?:www\.)?masseffect2.in/publ/' + self.DOCUMENT_ID_PATTERN.pattern - def use_pagecache(self): - """Allows use of downloaded page cache. It is essential for this - adapter, because the site does not offers chapter URL list, and many - pages have to be fetched and parsed repeatedly.""" - return True - def extractChapterUrlsAndMetadata(self): """Extracts chapter URLs and story metadata. Actually downloads all chapters, which is not exactly right, but necessary due to technical diff --git a/fanficfare/adapters/adapter_mediaminerorg.py b/fanficfare/adapters/adapter_mediaminerorg.py index 0f509f5e..c9009212 100644 --- a/fanficfare/adapters/adapter_mediaminerorg.py +++ b/fanficfare/adapters/adapter_mediaminerorg.py @@ -101,13 +101,6 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): def stripURLParameters(cls, url): return url - def use_pagecache(self): - ''' - Using pagecache with mediaminer.org caused SSL errors in - Calibre. I've no idea why, but not caching doesn't cause - it... - ''' - return False def extractChapterUrlsAndMetadata(self): diff --git a/fanficfare/adapters/adapter_novelonlinefullcom.py b/fanficfare/adapters/adapter_novelonlinefullcom.py index 774f4fbf..040b2fe7 100644 --- a/fanficfare/adapters/adapter_novelonlinefullcom.py +++ b/fanficfare/adapters/adapter_novelonlinefullcom.py @@ -95,13 +95,6 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter): # http://novelonlinefull.com/novel/stellar_transformation return r"https?://(novelonlinefull|lightnovelgate)\.com/novel/(?P<id>[^/]+)" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): # fetch the chapter. From that we will get almost all the # metadata and chapter list diff --git a/fanficfare/adapters/adapter_quotevcom.py b/fanficfare/adapters/adapter_quotevcom.py index 7d8fab9e..f3e7f047 100644 --- a/fanficfare/adapters/adapter_quotevcom.py +++ b/fanficfare/adapters/adapter_quotevcom.py @@ -60,9 +60,6 @@ class QuotevComAdapter(BaseSiteAdapter): pattern = pattern.replace(r'www\.', r'(www\.)?') return pattern - def use_pagecache(self): - return True - def extractChapterUrlsAndMetadata(self): data = self.get_request(self.url) diff --git a/fanficfare/adapters/adapter_royalroadcom.py b/fanficfare/adapters/adapter_royalroadcom.py index 239b9603..15f3632b 100644 --- a/fanficfare/adapters/adapter_royalroadcom.py +++ b/fanficfare/adapters/adapter_royalroadcom.py @@ -89,13 +89,6 @@ class RoyalRoadAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return "https?"+re.escape("://")+r"(www\.|)royalroadl?\.com/fiction/\d+(/.*)?$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def make_soup(self,data): soup = super(RoyalRoadAdapter, self).make_soup(data) self.handle_spoilers(soup) diff --git a/fanficfare/adapters/adapter_scribblehubcom.py b/fanficfare/adapters/adapter_scribblehubcom.py index 7293ebd7..33b0b8cd 100644 --- a/fanficfare/adapters/adapter_scribblehubcom.py +++ b/fanficfare/adapters/adapter_scribblehubcom.py @@ -90,13 +90,6 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX def getSiteURLPattern(self): return re.escape("https://"+self.getSiteDomain())+r"/(series|read)/(?P<id>\d+)[/-](?P<title>[^/]+)" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - # Set cookie to ascending order before page loads, means we know date published def set_contents_cookie(self): cookie = cl.Cookie(version=0, name='toc_sorder', value='asc', diff --git a/fanficfare/adapters/adapter_silmarillionwritersguildorg.py b/fanficfare/adapters/adapter_silmarillionwritersguildorg.py index 66c6d440..b00e5bbe 100644 --- a/fanficfare/adapters/adapter_silmarillionwritersguildorg.py +++ b/fanficfare/adapters/adapter_silmarillionwritersguildorg.py @@ -66,13 +66,6 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?://"+re.escape(self.getSiteDomain()+"/archive/home/viewstory.php?sid=")+r"\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## Getting the chapter list and the meta data def extractChapterUrlsAndMetadata(self): diff --git a/fanficfare/adapters/adapter_siyecouk.py b/fanficfare/adapters/adapter_siyecouk.py index da7bc2db..3092ec16 100644 --- a/fanficfare/adapters/adapter_siyecouk.py +++ b/fanficfare/adapters/adapter_siyecouk.py @@ -71,13 +71,6 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX def getSiteURLPattern(self): return r"https?://(www\.)?siye\.co\.uk/(siye/)?"+re.escape("viewstory.php?sid=")+r"\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): diff --git a/fanficfare/adapters/adapter_storiesonlinenet.py b/fanficfare/adapters/adapter_storiesonlinenet.py index 13ece8ab..07d7d4dd 100644 --- a/fanficfare/adapters/adapter_storiesonlinenet.py +++ b/fanficfare/adapters/adapter_storiesonlinenet.py @@ -149,13 +149,6 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter): username)) raise exceptions.FailedToLogin(url,username) - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - ## Getting the chapter list and the meta data, plus 'is adult' checking. def doExtractChapterUrlsAndMetadata(self, get_cover=True): diff --git a/fanficfare/adapters/adapter_swiorgru.py b/fanficfare/adapters/adapter_swiorgru.py index 2f8251b1..fe08445f 100644 --- a/fanficfare/adapters/adapter_swiorgru.py +++ b/fanficfare/adapters/adapter_swiorgru.py @@ -53,13 +53,6 @@ class SwiOrgRuAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"http://" + re.escape(self.getSiteDomain() + "/mlp-fim/story/")+r"\d+" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): url=self.url logger.debug("URL: "+url) diff --git a/fanficfare/adapters/adapter_tenhawkpresents.py b/fanficfare/adapters/adapter_tenhawkpresents.py index d4bd1ef2..d2983317 100644 --- a/fanficfare/adapters/adapter_tenhawkpresents.py +++ b/fanficfare/adapters/adapter_tenhawkpresents.py @@ -58,13 +58,6 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter): # accept https, but don't use it--site SSL is broken. return r"https?:"+re.escape("//"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def needToLoginCheck(self, data): if 'Registered Users Only' in data \ or 'There is no such account on our website' in data \ diff --git a/fanficfare/adapters/adapter_test1.py b/fanficfare/adapters/adapter_test1.py index a410d1e3..82cc136b 100644 --- a/fanficfare/adapters/adapter_test1.py +++ b/fanficfare/adapters/adapter_test1.py @@ -52,9 +52,6 @@ class TestSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return BaseSiteAdapter.getSiteURLPattern(self)+r'/?\?sid=\d+$' - def use_pagecache(self): - return True - def extractChapterUrlsAndMetadata(self): logger.debug('extractChapterUrlsAndMetadata: %s' % self.url) idstr = self.story.getMetadata('storyId') diff --git a/fanficfare/adapters/adapter_trekfanfictionnet.py b/fanficfare/adapters/adapter_trekfanfictionnet.py index 7aace755..06b9e408 100644 --- a/fanficfare/adapters/adapter_trekfanfictionnet.py +++ b/fanficfare/adapters/adapter_trekfanfictionnet.py @@ -83,13 +83,6 @@ class TrekFanFictionNetSiteAdapter(BaseSiteAdapter): return re.escape('https://{}'.format( self.getSiteDomain()))+r'/((?P<category>[^/]+)/)?(?P<author>[^/]+)/(?P<id>[^/]+)/?$' - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def get_request(self,url): try: return super(getClass(), self).get_request(url) diff --git a/fanficfare/adapters/adapter_tthfanficorg.py b/fanficfare/adapters/adapter_tthfanficorg.py index 8d71ce87..af01c80b 100644 --- a/fanficfare/adapters/adapter_tthfanficorg.py +++ b/fanficfare/adapters/adapter_tthfanficorg.py @@ -63,13 +63,6 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P<id>\d+)(-\d+)?(/.*)?$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - # tth won't send you future updates if you aren't 'caught up' # on the story. Login isn't required for F21, but logging in will # mark stories you've downloaded as 'read' on tth. diff --git a/fanficfare/adapters/adapter_wattpadcom.py b/fanficfare/adapters/adapter_wattpadcom.py index 2198112c..14e874ea 100644 --- a/fanficfare/adapters/adapter_wattpadcom.py +++ b/fanficfare/adapters/adapter_wattpadcom.py @@ -79,9 +79,6 @@ class WattpadComAdapter(BaseSiteAdapter): def getDateFormat(cls): return "%Y-%m-%dT%H:%M:%SZ" - def use_pagecache(self): - return True - def getStoryId(self, url): storyIdInUrl = re.match(r'https://www\.wattpad\.com/story/(?P<storyId>\d+).*', url) if storyIdInUrl is not None: diff --git a/fanficfare/adapters/adapter_webnovelcom.py b/fanficfare/adapters/adapter_webnovelcom.py index 219ec932..d03bf902 100644 --- a/fanficfare/adapters/adapter_webnovelcom.py +++ b/fanficfare/adapters/adapter_webnovelcom.py @@ -103,13 +103,6 @@ class WWWWebNovelComAdapter(BaseSiteAdapter): # https://www.webnovel.com/book/game-of-thrones%3A-the-prideful-one._17509790806343405 return r'https://' + re.escape(self.getSiteDomain()) + r'/book/(?P<title>.*_)?(?P<id>\d+)' - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - # Getting the chapter list and the meta data, plus 'is adult' checking. def doExtractChapterUrlsAndMetadata(self, get_cover=True): url = self.url diff --git a/fanficfare/adapters/adapter_whoficcom.py b/fanficfare/adapters/adapter_whoficcom.py index 7a3fd447..22506d6f 100644 --- a/fanficfare/adapters/adapter_whoficcom.py +++ b/fanficfare/adapters/adapter_whoficcom.py @@ -47,13 +47,6 @@ class WhoficComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?"+re.escape("://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): # get storyId from url--url validation guarantees query is only sid=1234 diff --git a/fanficfare/adapters/adapter_wuxiaworldco.py b/fanficfare/adapters/adapter_wuxiaworldco.py index 19532b12..2d930bb2 100644 --- a/fanficfare/adapters/adapter_wuxiaworldco.py +++ b/fanficfare/adapters/adapter_wuxiaworldco.py @@ -64,9 +64,6 @@ class WuxiaWorldCoSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r'https?://(www|m)\.wuxiaworld\.co/(?P<id>[^/]+)(/)?' - def use_pagecache(self): - return True - def extractChapterUrlsAndMetadata(self): logger.debug('URL: %s', self.url) diff --git a/fanficfare/adapters/adapter_wuxiaworldcom.py b/fanficfare/adapters/adapter_wuxiaworldcom.py index 53edf274..d0899e09 100644 --- a/fanficfare/adapters/adapter_wuxiaworldcom.py +++ b/fanficfare/adapters/adapter_wuxiaworldcom.py @@ -60,9 +60,6 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r'https?://%s/novel/(?P<id>[^/]+)(/)?' % re.escape(self.getSiteDomain()) - def use_pagecache(self): - return True - def _parse_linked_data(self, soup): # See https://json-ld.org tag = soup.find('script', type='application/ld+json') diff --git a/fanficfare/adapters/adapter_wuxiaworldsite.py b/fanficfare/adapters/adapter_wuxiaworldsite.py index 16c06d31..a4b07de9 100644 --- a/fanficfare/adapters/adapter_wuxiaworldsite.py +++ b/fanficfare/adapters/adapter_wuxiaworldsite.py @@ -61,9 +61,6 @@ class WuxiaWorldSiteSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r'https?://%s/novel/(?P<id>[^/]+)(/)?' % re.escape(self.getSiteDomain()) - def use_pagecache(self): - return True - def _parse_linked_data(self, soup): # See https://json-ld.org tag = soup.find('script', type='application/ld+json') diff --git a/fanficfare/adapters/adapter_wwwnovelallcom.py b/fanficfare/adapters/adapter_wwwnovelallcom.py index e032e0a5..93e41913 100644 --- a/fanficfare/adapters/adapter_wwwnovelallcom.py +++ b/fanficfare/adapters/adapter_wwwnovelallcom.py @@ -98,13 +98,6 @@ class WWWNovelAllComAdapter(BaseSiteAdapter): # https://www.novelall.com/chapter/The-Legendary-Moonlight-Sculptor-Volume-1-Chapter-1/1048282/ return r"https://www\.novelall\.com/(?P<novchap>novel|chapter)/(?P<id>[^/\.]+)(/\d+/?)?(\.html)?$" - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def extractChapterUrlsAndMetadata(self): if self.is_adult or self.getConfig("is_adult"): addurl = "?waring=1" diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py index cb707d16..e58e96ce 100644 --- a/fanficfare/adapters/base_adapter.py +++ b/fanficfare/adapters/base_adapter.py @@ -113,13 +113,6 @@ class BaseSiteAdapter(Requestable): self.getSiteDomain(), self.getSiteExampleURLs()) - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return False - def _section_url(self,url): ''' For adapters that have story URLs that can change. This is diff --git a/fanficfare/adapters/base_xenforoforum_adapter.py b/fanficfare/adapters/base_xenforoforum_adapter.py index 13dd0081..f84a94c1 100644 --- a/fanficfare/adapters/base_xenforoforum_adapter.py +++ b/fanficfare/adapters/base_xenforoforum_adapter.py @@ -185,13 +185,6 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): # logger.debug("post-url:%s"%url) return url - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - def performLogin(self,data): params = {} diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index 7a762aae..5594b777 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -540,7 +540,7 @@ class Configuration(ConfigParser): self.fetcher = None # the network layer for getting pages the # caching layer for getting pages, created now for # get_empty_pagecache() etc. - self.cache = fetcher.BaseCache() + self.cache = fetcher.BasicCache() self.opener = None # used for _filelist self.lightweight = lightweight @@ -955,7 +955,7 @@ class Configuration(ConfigParser): def get_fetcher(self): if not self.fetcher: - logger.error(self.getConfig('use_cloudscraper')) + logger.debug("use_cloudscraper:%s"%self.getConfig('use_cloudscraper')) if self.getConfig('use_cloudscraper',False): fetchcls = fetcher.CloudScraperFetcher else: @@ -973,7 +973,9 @@ class Configuration(ConfigParser): # cache decorator terminates the chain when found. Cache # created in __init__ because of get_empty_pagecache() # etc, but not used until now. - self.cache.decorate_fetcher(self.fetcher) + logger.debug("use_pagecache:%s"%self.getConfig('use_pagecache')) + if self.getConfig('use_pagecache'): + fetcher.BasicCacheDecorator(self.cache).decorate_fetcher(self.fetcher) if self.getConfig('progressbar'): fetcher.ProgressBarDecorator().decorate_fetcher(self.fetcher) diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 061e0768..cff18eba 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -574,6 +574,12 @@ normalize_text_links:true ## normalize_text_links will improve URL matching considerably. internalize_text_links:true +## Of the ~140 supported sites, only ~50 have been checked to work +## correctly with a page cache. The page cache is used save already +## downloaded pages which can be called more than once, especially in +## the Calibre plugin. +use_pagecache:false + [base_efiction] ## At the time of writing, eFiction Base adapters allow downloading @@ -588,6 +594,7 @@ storynotes_label:Story Notes add_to_extra_titlepage_entries:,storynotes [base_xenforoforum] +use_pagecache:true ## Some sites require login for some stories #username:YourName #password:yourpassword @@ -1150,7 +1157,7 @@ windows_eol: true ## URLs like: http://test1.com?sid=12345 [test1.com] - +use_pagecache:true extratags: FanFiction,Testing # extracategories:Fafner @@ -1268,6 +1275,7 @@ website_encodings:Windows-1252,utf8,iso-8859-1 website_encodings:Windows-1252,utf8 [archiveofourown.org] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1435,6 +1443,7 @@ extraships:Severus Snape/Hermione Granger website_encodings:Windows-1252,utf8 [bloodshedverse.com] +use_pagecache:true ## website encoding(s) In theory, each website reports the character ## encoding they use for each page. In practice, some sites report it ## incorrectly. Each adapter has a default list, usually "utf8, @@ -1511,7 +1520,11 @@ cover_exclusion_regexp:/images/.*?ribbon.gif website_encodings:Windows-1252,utf8 +[chireads.com] +use_pagecache:true + [chosentwofanfic.com] +use_pagecache:true extra_valid_entries:disclaimer disclaimer_label: Disclaimer add_to_titlepage_entries:,disclaimer @@ -1623,6 +1636,7 @@ extracategories:Harry Potter website_encodings:Windows-1252,utf8 [fanfic.tenhawkpresents.ink] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1633,6 +1647,7 @@ website_encodings:Windows-1252,utf8 website_encodings:Windows-1252,utf8 [fanficauthors.net] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1641,6 +1656,7 @@ website_encodings:Windows-1252,utf8 #password:yourpassword [fanfics.me] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1668,6 +1684,7 @@ make_linkhtml_entries:translators,betas include_in_category:fandoms [fanfictalk.com] +use_pagecache:true ## Some sites also require the user to confirm they are adult for ## adult content. In commandline version, this should go in your ## personal.ini, not defaults.ini. @@ -1736,6 +1753,9 @@ add_to_keep_html_attrs:,style [fanfiction-junkies.de] website_encodings:Windows-1252,utf8 +[fastnovel.net] +use_pagecache:true + [fiction.live] ## Recommended if you include images, fiction.live tends to have many ## duplicated images. @@ -1786,6 +1806,7 @@ add_to_output_css: } [fictionhunt.com] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1808,6 +1829,7 @@ make_linkhtml_entries:origin add_to_extra_titlepage_entries:originHTML [fictionmania.tv] +use_pagecache:true website_encodings:ISO-8859-1,auto ## Extra metadata that this adapter knows about. See [archiveofourown.org] @@ -1848,6 +1870,7 @@ likes_label:Likes dislikes_label:Dislikes [ficwad.com] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -1947,6 +1970,7 @@ reader_posts_per_page:30 #password:yourpassword [harrypotterfanfiction.com] +use_pagecache:true extra_valid_entries:reviews,era ## Site dedicated to these categories/characters/ships @@ -1980,6 +2004,7 @@ website_encodings:Windows-1252,utf8 website_encodings:Windows-1252,utf8 [inkbunny.net] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -2015,10 +2040,12 @@ extra_titlepage_entries:universe,crossoverfandom website_encodings:Windows-1252,utf8 [lcfanfic.com] +use_pagecache:true ## Site dedicated to these categories/characters/ships extracategories:Lois & Clark: The New Adventures of Superman [literotica.com] +use_pagecache:true user_agent: extra_valid_entries:eroticatags,averrating eroticatags_label:Erotica Tags @@ -2103,6 +2130,7 @@ extracharacters:Carol,Daryl extraships:Carol/Daryl [novelonlinefull.com] +use_pagecache:true website_encodings: utf8:ignore, Windows-1252, iso-8859-1 ## Clear FanFiction from defaults, site is original fiction. @@ -2155,6 +2183,7 @@ extracategories:The Pretender website_encodings:Windows-1252,utf8 [quotev.com] +use_pagecache:true user_agent:Mozilla/5.0 slow_down_sleep_time:2 extra_valid_entries:pages,readers,reads,favorites,searchtags,comments @@ -2446,6 +2475,7 @@ slow_down_sleep_time:2 #password:yourpassword [storiesonline.net] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -2576,6 +2606,7 @@ readings_label: Readings website_encodings:Windows-1252,utf8 [trekfanfiction.net] +use_pagecache:true website_encodings:utf8,Windows-1252,iso-8859-1 [trekiverse.org] @@ -2613,6 +2644,7 @@ reviews_label:Reviews readings_label:Readings [wattpad.com] +use_pagecache:true #is_adult:true extra_titlepage_entries: language, reads extra_valid_entries: language, tags, reads @@ -2626,7 +2658,11 @@ add_to_comma_entries:,reads ## are reports that sound like site throttling without it. slow_down_sleep_time:2 +[wuxiaworld.site] +use_pagecache:true + [www.adastrafanfic.com] +use_pagecache:true ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. @@ -2675,6 +2711,7 @@ website_encodings:Windows-1252,utf8 strip_text_links:true [www.asianfanfics.com] +use_pagecache:true ## Unlike most sites, asianfanfics.com, instead of denying access to ## 'adult' or subscriber-only content, will censor the text of stories ## to remove 'adult' words or entire portions of the text. This is why @@ -2715,6 +2752,7 @@ include_in_category:tags auto_sub:false [www.bdsmlibrary.com] +use_pagecache:true ## Some sites also require the user to confirm they are adult for ## adult content. Uncomment by removing '#' in front of is_adult. #is_adult:true @@ -2795,6 +2833,7 @@ type_label:Type of Couple website_encodings:Windows-1252,utf8 [www.fanfiction.net] +use_pagecache:true ## Using cloudscraper can satisfy the first couple levels of ## Cloudflare bot-proofing, but not all levels. Older versions of ## OpenSSL will also raise problems, so versions of Calibre older than @@ -2850,6 +2889,7 @@ dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S check_next_chapter:false [www.fanfiktion.de] +use_pagecache:true ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not @@ -2862,6 +2902,7 @@ check_next_chapter:false extra_valid_entries:native_status [www.ficbook.net] +use_pagecache:true ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. @@ -2933,6 +2974,7 @@ datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S [www.fimfiction.net] +use_pagecache:true ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. @@ -3039,6 +3081,7 @@ add_to_titlepage_entries:,growth, shrink, sizeroles #password:yourpassword [www.hentai-foundry.com] +use_pagecache:true ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. @@ -3125,6 +3168,7 @@ extracategories:Lord of the Rings #password:yourpassword [www.lotrgfic.com] +use_pagecache:true extra_valid_entries:places, times places_label: Places times_label:Times @@ -3152,6 +3196,7 @@ eroticatags_label:Erotica Tags extra_titlepage_entries:eroticatags [www.masseffect2.in] +use_pagecache:true ## Site dedicated to this fandom. extracategories: Mass Effect @@ -3178,6 +3223,9 @@ adult_ratings: E,R website_encodings:utf8 [www.mediaminer.org] +## Using pagecache with mediaminer.org caused SSL errors in Calibre. +## I've no idea why, but turning off explicitly. +use_pagecache:false dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S ## Note that mediaminer doesn't give datePublished on the story's ## index page--it's collected from the earliest uploaded chapter. So @@ -3243,6 +3291,7 @@ extracategories:Naruto extracategories:NCIS [www.novelall.com] +use_pagecache:true website_encodings: utf8:ignore, Windows-1252, iso-8859-1 ## Clear FanFiction from defaults, site is original fiction. @@ -3328,6 +3377,7 @@ extracategories:Queer as Folk website_encodings:Windows-1252,utf8 [www.royalroad.com] +use_pagecache:true extra_valid_entries:stars #add_to_extra_titlepage_entries:,stars @@ -3377,6 +3427,7 @@ extracharacters:Kurt Hummel,Blaine Anderson website_encodings:Windows-1252,utf8 [www.scribblehub.com] +use_pagecache:true extra_valid_entries:views, averageWords views_label:Views averageWords_label:Average Words (Chapter) @@ -3388,7 +3439,11 @@ add_to_titlepage_entries:,views, averageWords ## personal.ini and list the ones you don't want. #exclude_notes:authornotes,newsboxes +[www.silmarillionwritersguild.org] +use_pagecache:true + [www.siye.co.uk] +use_pagecache:true ## Site dedicated to these categories/characters/ships extracategories:Harry Potter extracharacters:Harry Potter,Ginny Weasley @@ -3428,6 +3483,9 @@ extracategories:Lord of the Rings website_encodings:Windows-1252,utf8 +[www.swi.org.ru] +use_pagecache:true + [www.the-sietch.com] ## see [base_xenforoforum] @@ -3469,6 +3527,7 @@ extracategories:Star Trek: Voyager #password:yourpassword [www.tthfanfic.org] +use_pagecache:true user_agent: slow_down_sleep_time:2 ## Some sites do not require a login, but do require the user to @@ -3564,6 +3623,7 @@ extraships:Severus Snape/Harry Potter website_encodings:Windows-1252,utf8 [www.webnovel.com] +use_pagecache:true ## Extra metadata that this adapter knows about. See [archiveofourown.org] ## for examples of how to use them. extra_valid_entries:translator, editor, sitetags @@ -3586,6 +3646,7 @@ extra_titlepage_entries: translator, editor, sitetags fix_pseudo_html:false [www.whofic.com] +use_pagecache:true website_encodings:Windows-1252,utf8 [www.wolverineandrogue.com] @@ -3611,12 +3672,14 @@ extracategories:Stargate: Atlantis website_encodings:Windows-1252,utf8 [www.wuxiaworld.co] +use_pagecache:true ## Note that wuxiaworld.co != wuxiaworld.com ## When dedup_order_chapter_list:true, use a heuristic algorithm ## specific to wuxiaworld.co order and dedup chapters. dedup_order_chapter_list:false [www.wuxiaworld.com] +use_pagecache:true user_agent:Mozilla/5.0 ## Authors on wuxiaworld.com create their own index pages, so it's not ## uncommon for there to be links to chapters that haven't been diff --git a/fanficfare/fetcher.py b/fanficfare/fetcher.py index c732154a..78488eb5 100644 --- a/fanficfare/fetcher.py +++ b/fanficfare/fetcher.py @@ -30,6 +30,7 @@ import logging import sys import pickle from functools import partial +import threading from urllib3.util.retry import Retry import requests @@ -164,9 +165,9 @@ class SleepDecorator(FetcherDecorator): return fetchresp -class BaseCache(FetcherDecorator): +class BasicCache(object): def __init__(self): - super(BaseCache,self).__init__() + self.cache_lock = threading.RLock() self.pagecache = self.get_empty_pagecache() self.save_cache_file = None @@ -174,34 +175,42 @@ class BaseCache(FetcherDecorator): return {} def get_pagecache(self): - return self.pagecache + with self.cache_lock: + return self.pagecache def set_pagecache(self,d,save_cache_file=None): - self.save_cache_file = save_cache_file - self.pagecache=d + with self.cache_lock: + self.save_cache_file = save_cache_file + self.pagecache=d def make_cachekey(self, url, parameters=None): - keylist=[url] - if parameters != None: - keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items()))) - return unicode('?'.join(keylist)) + with self.cache_lock: + keylist=[url] + if parameters != None: + keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items()))) + return unicode('?'.join(keylist)) def has_cachekey(self,cachekey): - return self.use_pagecache and cachekey in self.get_pagecache() + with self.cache_lock: + return cachekey in self.get_pagecache() def get_from_cache(self,cachekey): - if self.use_pagecache: - return self.get_pagecache().get(cachekey) - else: - return None + with self.cache_lock: + return self.get_pagecache().get(cachekey,None) def set_to_cache(self,cachekey,data,redirectedurl): - if self.use_pagecache: + with self.cache_lock: self.get_pagecache()[cachekey] = (data,ensure_text(redirectedurl)) if self.save_cache_file: with open(self.save_cache_file,'wb') as jout: pickle.dump(self.get_pagecache(),jout,protocol=2) + +class BasicCacheDecorator(FetcherDecorator): + def __init__(self,cache): + super(BasicCacheDecorator,self).__init__() + self.cache = cache + def fetcher_do_request(self, fetcher, chainfn, @@ -216,12 +225,12 @@ class BaseCache(FetcherDecorator): Note that usecache=False prevents lookup, but cache still saves result ''' - logger.debug("BaseCache fetcher_do_request") - cachekey=self.make_cachekey(url, parameters) + logger.debug("BasicCacheDecorator fetcher_do_request") + cachekey=self.cache.make_cachekey(url, parameters) - if usecache and self.has_cachekey(cachekey) and not cachekey.startswith('file:'): + if usecache and self.cache.has_cachekey(cachekey) and not cachekey.startswith('file:'): logger.debug("#####################################\npagecache(%s) HIT: %s"%(method,safe_url(cachekey))) - data,redirecturl = self.get_from_cache(cachekey) + data,redirecturl = self.cache.get_from_cache(cachekey) return FetcherResponse(data,redirecturl=redirecturl,fromcache=True) logger.debug("#####################################\npagecache(%s) MISS: %s"%(method,safe_url(cachekey))) @@ -241,9 +250,9 @@ class BaseCache(FetcherDecorator): ## saved-cache and wondering why file changes aren't showing ## up. if not fetchresp.fromcache: - self.set_to_cache(cachekey,data,fetchresp.redirecturl) + self.cache.set_to_cache(cachekey,data,fetchresp.redirecturl) if url != fetchresp.redirecturl: # cache both? - self.set_to_cache(cachekey,data,url) + self.cache.set_to_cache(cachekey,data,url) return fetchresp class FetcherResponse(object): diff --git a/fanficfare/requestable.py b/fanficfare/requestable.py index 4bb40ad8..f7b7be5a 100644 --- a/fanficfare/requestable.py +++ b/fanficfare/requestable.py @@ -26,11 +26,6 @@ class Requestable(Configurable): def __init__(self, configuration): Configurable.__init__(self,configuration) - ## use_pagecache() is on adapters--not all have been updated - ## to deal with caching correctly - if hasattr(self, 'use_pagecache'): - self.configuration.cache.use_pagecache = self.use_pagecache() - ## website encoding(s)--in theory, each website reports the character ## encoding they use for each page. In practice, some sites report it ## incorrectly. Each adapter has a default list, usually "utf8,