ffnet only: try_shortened_title_urls option #1166

This commit is contained in:
Jim Miller 2025-03-18 20:49:46 -05:00
parent 08ccc659ca
commit a973b8c926
2 changed files with 27 additions and 0 deletions

View file

@ -110,6 +110,31 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*",
r"https://www.\g<keep>",url)+self.urltitle
def get_request(self,url):
## use super version if not set or isn't a chapter URL with a
## title.
if( not self.getConfig("try_shortened_title_urls") or
not re.match(r"https?://www\.fanfiction\.net/s/\d+/\d+/(?P<title>[^/]+)$", url) ):
return super(getClass(), self).get_request(url)
## kludgey way to attempt more than one URL variant by
## removing title one letter at a time. Note that network and
## open_pages_in_browser retries still happen first.
titlelen = len(url.split('/')[-1])
maxcut = min([4,titlelen])
j = 0
while j < maxcut: # should actually leave loop either by
# return or exception raise.
try:
useurl = url
if j: # j==0, full URL, then remove letters.
useurl = url[:-j]
return super(getClass(), self).get_request(useurl)
except exceptions.HTTPErrorFFF as fffe:
if j >= maxcut or 'Page not found or expired' not in unicode(fffe):
raise
j = j+1
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
# fetch the chapter. From that we will get almost all the

View file

@ -235,6 +235,7 @@ def get_valid_set_options():
'meta_from_last_chapter':(ffnet_list,None,boollist),
'tweak_fg_sleep':(None,None,boollist),
'skip_author_cover':(ffnet_list,None,boollist),
'try_shortened_title_urls':(['fanfiction.net'],None,boollist),
'fix_fimf_blockquotes':(['fimfiction.net'],None,boollist),
'keep_prequel_in_description':(['fimfiction.net'],None,boollist),
@ -394,6 +395,7 @@ def get_valid_keywords():
'check_next_chapter',
'meta_from_last_chapter',
'skip_author_cover',
'try_shortened_title_urls',
'collect_series',
'comma_entries',
'connect_timeout',