adapter_fanfictionnet: Start keeping story title part of storyUrl.

2026-05-09 05:21:13 +02:00 · 2021-01-20 12:27:42 -06:00 · 2021-01-20 12:27:42 -06:00 · 6965a04403
commit 6965a04403
parent 48b8730571
2 changed files with 39 additions and 17 deletions
--- a/calibre-plugin/fff_plugin.py
+++ b/calibre-plugin/fff_plugin.py
@ -1082,6 +1082,14 @@ class FanFicFarePlugin(InterfaceAction):
        # http, plus many sites are now switching to https.
        regexp = r'identifiers:"~ur(i|l):~^https?%s$"'%(re.sub(r'^https?','',re.escape(url)))
        # logger.debug(regexp)
+        ## Added Jan 2021, adapter_fanfictionnet is keeping title in
+        ## URL now, search with and without url title.  'URL changed'
+        ## check will still trigger if existing URL has a *different*
+        ## url title.
+        if "\.fanfiction\.net" in regexp:
+            regexp = re.sub(r"^(?P<keep>.*net/s/\d+/\d+/)(?P<urltitle>[^\$]*)?",
+                            r"\g<keep>(\g<urltitle>)?",regexp)
+        # logger.debug(regexp)
        return self.gui.current_db.search_getting_ids(regexp,None,use_virtual_library=False)

    def prep_downloads(self, options, books, merge=False, extrapayload=None):
--- a/fanficfare/adapters/adapter_fanfictionnet.py
+++ b/fanficfare/adapters/adapter_fanfictionnet.py
@ -24,6 +24,7 @@ import re
 # py2 vs py3 transition
 from ..six import text_type as unicode
 from ..six.moves.urllib.error import HTTPError
+from ..six.moves.urllib.parse import urlparse

 from ..chromagnon.cacheParse import ChromeCache

@ -42,20 +43,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
        BaseSiteAdapter.__init__(self, config, url)
        self.story.setMetadata('siteabbrev','ffnet')

-        # get storyId from url--url validation guarantees second part is storyId
-        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
+        self.set_story_idurl(url)

-        # normalized story URL.
-        self._setURL("https://"+self.getSiteDomain()\
-                         +"/s/"+self.story.getMetadata('storyId')+"/1/")
-
-        # ffnet update emails have the latest chapter URL.
-        # Frequently, when they arrive, not all the servers have the
-        # latest chapter yet and going back to chapter 1 to pull the
-        # chapter list doesn't get the latest.  So save and use the
-        # original URL given to pull chapter list & metadata.
-        # Not used by plugin because URL gets normalized first for
-        # eliminating duplicate story urls.
        self.origurl = url
        if "https://m." in self.origurl:
            ## accept m(mobile)url, but use www.
@ -74,6 +63,15 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
    def getSiteExampleURLs(cls):
        return "https://www.fanfiction.net/s/1234/1/ https://www.fanfiction.net/s/1234/12/ http://www.fanfiction.net/s/1234/1/Story_Title http://m.fanfiction.net/s/1234/1/"

+    def set_story_idurl(self,url):
+        parsedUrl = urlparse(url)
+        pathparts = parsedUrl.path.split('/',)
+        self.story.setMetadata('storyId',pathparts[2])
+        self.urltitle='' if len(pathparts)<5 else pathparts[4]
+        # normalized story URL.
+        self._setURL("https://"+self.getSiteDomain()\
+                         +"/s/"+self.story.getMetadata('storyId')+"/1/"+self.urltitle)
+
    def getSiteURLPattern(self):
        return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"

@ -136,6 +134,13 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
        '''
        return True

+    ## not actually putting urltitle on multi-chapters below, but
+    ## one-shots will have it, so this is still useful.  normalized
+    ## chapter URLs do NOT contain the story title.
+    def normalize_chapterurl(self,url):
+        return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*",
+                      r"https://www.\g<keep>",url)
+
    def doExtractChapterUrlsAndMetadata(self,get_cover=True):
        get_cover=False
        # fetch the chapter.  From that we will get almost all the
@ -165,6 +170,10 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
        if "Please check to see you are not using an outdated url." in data:
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  'Chapter not found. Please check to see you are not using an outdated url.'" % url)

+        # <link rel="canonical" href="//www.fanfiction.net/s/13551154/100/Haze-Gray">
+        canonicalurl = soup.select_one('link[rel=canonical]')['href']
+        self.set_story_idurl(canonicalurl)
+
        if self.getConfig('check_next_chapter'):
            try:
                ## ffnet used to have a tendency to send out update
@ -177,9 +186,10 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
                # get chapter part of url.
                except:
                    chapcount = 1
-                tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
-                                                self.story.getMetadata('storyId'),
-                                                chapcount+1)
+                tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
+                                                  self.story.getMetadata('storyId'),
+                                                  chapcount+1,
+                                                  self.urltitle)
                logger.debug('=Trying newer chapter: %s' % tryurl)
                newdata = self._fetchUrl(tryurl)
                if "not found. Please check to see you are not using an outdated url." not in newdata \
@ -409,7 +419,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
        ## ffnet(and, I assume, fpcom) tends to fail more if hit too
        ## fast.  This is in additional to what ever the
        ## slow_down_sleep_time setting is.
-        data = self._fetchUrl(url,extrasleep=4.0)
+
+        ## AND explicitly put title URL back on chapter URL for fetch
+        ## *only*--normalized chapter URL does NOT have urltitle
+        data = self._fetchUrl(url+self.urltitle,
+                              extrasleep=4.0)

        if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  FanFiction.net Site Error!" % url)