From a973b8c9264c1b061179e666e717a36d9eb8c0e6 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 18 Mar 2025 20:49:46 -0500 Subject: [PATCH] ffnet only: try_shortened_title_urls option #1166 --- fanficfare/adapters/adapter_fanfictionnet.py | 25 ++++++++++++++++++++ fanficfare/configurable.py | 2 ++ 2 files changed, 27 insertions(+) diff --git a/fanficfare/adapters/adapter_fanfictionnet.py b/fanficfare/adapters/adapter_fanfictionnet.py index 6e441258..aeb5fdac 100644 --- a/fanficfare/adapters/adapter_fanfictionnet.py +++ b/fanficfare/adapters/adapter_fanfictionnet.py @@ -110,6 +110,31 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): return re.sub(r"https?://(www|m)\.(?Pfanfiction\.net/s/\d+/\d+/).*", r"https://www.\g",url)+self.urltitle + def get_request(self,url): + ## use super version if not set or isn't a chapter URL with a + ## title. + if( not self.getConfig("try_shortened_title_urls") or + not re.match(r"https?://www\.fanfiction\.net/s/\d+/\d+/(?P[^/]+)$", url) ): + return super(getClass(), self).get_request(url) + + ## kludgey way to attempt more than one URL variant by + ## removing title one letter at a time. Note that network and + ## open_pages_in_browser retries still happen first. + titlelen = len(url.split('/')[-1]) + maxcut = min([4,titlelen]) + j = 0 + while j < maxcut: # should actually leave loop either by + # return or exception raise. + try: + useurl = url + if j: # j==0, full URL, then remove letters. + useurl = url[:-j] + return super(getClass(), self).get_request(useurl) + except exceptions.HTTPErrorFFF as fffe: + if j >= maxcut or 'Page not found or expired' not in unicode(fffe): + raise + j = j+1 + def doExtractChapterUrlsAndMetadata(self,get_cover=True): # fetch the chapter. From that we will get almost all the diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index ca764c81..0ae2c033 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -235,6 +235,7 @@ def get_valid_set_options(): 'meta_from_last_chapter':(ffnet_list,None,boollist), 'tweak_fg_sleep':(None,None,boollist), 'skip_author_cover':(ffnet_list,None,boollist), + 'try_shortened_title_urls':(['fanfiction.net'],None,boollist), 'fix_fimf_blockquotes':(['fimfiction.net'],None,boollist), 'keep_prequel_in_description':(['fimfiction.net'],None,boollist), @@ -394,6 +395,7 @@ def get_valid_keywords(): 'check_next_chapter', 'meta_from_last_chapter', 'skip_author_cover', + 'try_shortened_title_urls', 'collect_series', 'comma_entries', 'connect_timeout',