adapter_adultfanfictionorg: Fixes for site changes #1305

2026-05-08 12:36:11 +02:00 · 2026-02-01 12:57:40 -06:00 · 2026-02-01 12:57:40 -06:00 · 9b96c151a5
commit 9b96c151a5
parent 1b65a30798
3 changed files with 56 additions and 199 deletions
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@ -1604,13 +1604,6 @@ eroticatags_label:Erotica Tags
 disclaimer_label:Disclaimer
 extra_titlepage_entries:eroticatags,disclaimer

-## Some sites require login (or login for some rated stories) The
-## program can prompt you, or you can save it in config.  In
-## commandline version, this should go in your personal.ini, not
-## defaults.ini.
-#username:YourName
-#password:yourpassword
-
 [althistory.com]
 ## Note this is NOT the same as www.alternatehistory.com
 ## see [base_xenforoforum]
--- a/fanficfare/adapters/adapter_adultfanfictionorg.py
+++ b/fanficfare/adapters/adapter_adultfanfictionorg.py
@ -68,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
-        self.dateformat = "%Y-%m-%d"
-
-
+        self.dateformat = "%B %d, %Y"

    ## Added because adult-fanfiction.org does send you to
    ## www.adult-fanfiction.org when you go to it and it also moves
@ -139,91 +137,45 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
    def getSiteURLPattern(self):
        return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'

-    ##This is not working right now, so I'm commenting it out, but leaving it for future testing
-    ## Login seems to be reasonably standard across eFiction sites.
-    #def needToLoginCheck(self, data):
-        ##This adapter will always require a login
-    #    return True
-
-#    <form name="login" method="post" action="">
-#      <div class="top">E-mail: <span id="sprytextfield1">
-#        <input name="email" type="text" id="email" size="20" maxlength="255" />
-#        <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
-#      <div class="top">Password: <span id="sprytextfield2">
-#        <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
-#        <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
-#      <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
-#        <input type="submit" value="Login" />
-#      </div>
-#    </form>
-
-
-    ##This is not working right now, so I'm commenting it out, but leaving it for future testing
-    #def performLogin(self, url, soup):
-    #    params = {}
-
-    #    if self.password:
-    #        params['email'] = self.username
-    #        params['pass1'] = self.password
-    #    else:
-    #        params['email'] = self.getConfig("username")
-    #        params['pass1'] = self.getConfig("password")
-    #    params['submit'] = 'Login'
-
-    #    # copy all hidden input tags to pick up appropriate tokens.
-    #    for tag in soup.find_all('input',{'type':'hidden'}):
-    #        params[tag['name']] = tag['value']
-
-    #    logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
-
-    #    d = self.post_request(url, params, usecache=False)
-    #    d = self.post_request(url, params, usecache=False)
-    #    soup = self.make_soup(d)
-
-        #if not (soup.find('form', {'name' : 'login'}) == None):
-        #    logger.info("Failed to login to URL %s as %s" % (url, params['email']))
-        #    raise exceptions.FailedToLogin(url,params['email'])
-        #    return False
-        #else:
-    #    return True
-
    ## Getting the chapter list and the meta data, plus 'is adult' checking.
    def doExtractChapterUrlsAndMetadata(self, get_cover=True):

        ## You need to have your is_adult set to true to get this story
        if not (self.is_adult or self.getConfig("is_adult")):
            raise exceptions.AdultCheckRequired(self.url)
+        else:
+            d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
+            if "Age verified successfully" not in d:
+                raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))

        url = self.url
        logger.debug("URL: "+url)

        data = self.get_request(url)
+        # logger.debug(data)

        if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
            raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))

        soup = self.make_soup(data)

-        ##This is not working right now, so I'm commenting it out, but leaving it for future testing
-        #self.performLogin(url, soup)
-
-
        ## Title
        ## Some of the titles have a backslash on the story page, but not on the Author's page
        ## So I am removing it from the title, so it can be found on the Author's page further in the code.
        ## Also, some titles may have extra spaces '  ', and the search on the Author's page removes them,
        ## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
-        a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))
-        self.story.setMetadata('title',stripHTML(a).replace('\\','').replace('  ',' ').replace('  ',' ').replace('  ',' ').strip())
+        h1 = soup.find('h1')
+        # logger.debug("Title:%s"%h1)
+        self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace('  ',' ').replace('  ',' ').replace('  ',' ').strip())

-        # Find the chapters:
-        chapters = soup.find('ul',{'class':'dropdown-content'})
-        for i, chapter in enumerate(chapters.find_all('a')):
-            self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
+        # Find the chapters from first list only
+        chapters = soup.select_one('select.chapter-select').select('option')
+        for chapter in chapters:
+            self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])


        # Find authorid and URL from... author url.
-        a = soup.find('a', href=re.compile(r"profile.php\?no=\d+"))
+        a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
        if a == None:
            # I know that the original author of fanficfare wants to always have metadata,
            # but I posit that if the story is there, even if we can't get the metadata from the
@ -232,140 +184,56 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
            self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
            self.story.setMetadata('author','Unknown')
            logger.warning('There was no author found for the story... Metadata will not be retreived.')
-            self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<')
+            self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
        else:
            self.story.setMetadata('authorId',a['href'].split('=')[1])
            self.story.setMetadata('authorUrl',a['href'])
            self.story.setMetadata('author',stripHTML(a))

-            ##The story page does not give much Metadata, so we go to the Author's page
+            ## The story page does not give much Metadata, so we go to
+            ## the Author's page.  Except it's actually a sub-req for
+            ## list of author's stories for that subdomain
+            author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
+                self.getBaseDomain(),
+                self.zone,
+                self.story.getMetadata('authorId'))

-            ##Get the first Author page to see if there are multiple pages.
-            ##AFF doesn't care if the page number is larger than the actual pages,
-            ##it will continue to show the last page even if the variable is larger than the actual page
-            author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
-            #author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
-
-            ##I'm resetting the author page to the zone for this story
-            self.story.setMetadata('authorUrl',author_Url)
-
-            logger.debug('Getting the author page: {0}'.format(author_Url))
+            logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
            adata = self.get_request(author_Url)

-            if "The member you are looking for does not exist." in adata:
-                raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
-                #raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.")
+            none_found = "No stories found in this category."
+            if none_found in adata:
+                raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))

            asoup = self.make_soup(adata)
+            # logger.debug(asoup)

-            ##Getting the number of author pages
-            pages = 0
-            pagination=asoup.find('ul',{'class' : 'pagination'})
-            if pagination:
-                pages = pagination.find_all('li')[-1].find('a')
-                if not pages == None:
-                    pages = pages['href'].split('=')[-1]
-                else:
-                    pages = 0
+            story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
+            # logger.debug(story_card)

-            storya = None
-            ##If there is only 1 page of stories, check it to get the Metadata,
-            if pages == 0:
-                a = asoup.find_all('li')
-                for lc2 in a:
-                    if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
-                        storya = lc2
-                        break
-            ## otherwise go through the pages
-            else:
-                page=1
-                i=0
-                while i == 0:
-                    ##We already have the first page, so if this is the first time through, skip getting the page
-                    if page != 1:
-                        author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
-                        logger.debug('Getting the author page: {0}'.format(author_Url))
-                        adata = self.get_request(author_Url)
-                        ##This will probably never be needed, since AFF doesn't seem to care what number you put as
-                        ## the page number, it will default to the last page, even if you use 1000, for an author
-                        ## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
-                        if "The member you are looking for does not exist." in adata:
-                            raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
-                    # we look for the li element that has the story here
-                    asoup = self.make_soup(adata)
+            ## Category
+            ## I've only seen one category per story so far, but just in case:
+            for cat in story_card.select('div.story-card-category'):
+                # remove Category:, old code suggests Located: is also
+                # possible, so removing by <strong>
+                cat.find("strong").decompose()
+                self.story.addToList('category',stripHTML(cat))

-                    a = asoup.find_all('li')
-                    for lc2 in a:
-                        if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
-                            i=1
-                            storya = lc2
-                            break
-                    page = page + 1
-                    if page > int(pages):
-                        break
+            self.setDescription(url,story_card.select_one('div.story-card-description'))

-            ##Split the Metadata up into a list
-            ##We have to change the soup type to a string, then remove the newlines, and double spaces,
-            ##then changes the <br/> to '-:-', which seperates the different elemeents.
-            ##Then we strip the HTML elements from the string.
-            ##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'.
-            ##They are always in the same order.
-            ## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it
-            liMetadata = unicode(storya).replace('\n','').replace('\r','').replace('\t',' ').replace('  ',' ').replace('  ',' ').replace('  ',' ')
-            liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-'))
-            liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
-            for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')):
-                if i == 0:
-                    # The value for the title has been manipulated, so may not be the same as gotten at the start.
-                    # I'm going to use the href from the storya retrieved from the author's page to determine if it is correct.
-                    if storya.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
-                        raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
-                elif i == 1:
-                    ##Get the description
-                    self.setDescription(url,stripHTML(value.strip()))
-                else:
-                    # the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
-                    if 'Located :' in value:
-                        self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
-                    elif 'Category :' in value:
-                        # Get the Category
-                        self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
-                    elif 'Content Tags :' in value:
-                        # Get the Erotic Tags
-                        value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
-                        for code in re.split(r'\s',value):
-                            self.story.addToList('eroticatags',code)
-                    elif 'Posted :' in value:
-                        # Get the Posted Date
-                        value = value.replace(r'Posted :',r'').strip()
-                        if value.startswith('008'):
-                            # It is unknown how the 200 became 008, but I'm going to change it back here
-                            value = value.replace('008','200')
-                        elif value.startswith('0000'):
-                            # Since the date is showing as 0000,
-                            # I'm going to put the memberdate here
-                            value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
-                        self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
-                    elif 'Edited :' in value:
-                        # Get the 'Updated' Edited date
-                        # AFF has the time for the Updated date, and we only want the date,
-                        # so we take the first 10 characters only
-                        value = value.replace(r'Edited :',r'').strip()[0:10]
-                        if value.startswith('008'):
-                            # It is unknown how the 200 became 008, but I'm going to change it back here
-                            value = value.replace('008','200')
-                            self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
-                        elif value.startswith('0000') or '-00-' in value:
-                            # Since the date is showing as 0000,
-                            # or there is -00- in the date,
-                            # I'm going to put the Published date here
-                            self.story.setMetadata('dateUpdated', self.story.getMetadataRaw('datePublished'))
-                        else:
-                            self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
-                    else:
-                        # This catches the blank elements, and the Review and Dragon Prints.
-                        # I am not interested in these, so do nothing
-                        zzzzzzz=0
+            for tag in story_card.select('span.story-tag'):
+                self.story.addToList('eroticatags',stripHTML(tag))
+
+            ## created/updates share formatting
+            for meta in story_card.select('div.story-card-meta-item span:last-child'):
+                meta = stripHTML(meta)
+                if 'Created: ' in meta:
+                    meta = meta.replace('Created: ','')
+                    self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
+
+                if 'Updated: ' in meta:
+                    meta = meta.replace('Updated: ','')
+                    self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))

    # grab the text for an individual chapter.
    def getChapterText(self, url):
@ -373,10 +241,11 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
        logger.debug('Getting chapter text from: %s' % url)

        soup = self.make_soup(self.get_request(url))
-        chaptertag = soup.find('ul',{'class':'pagination'}).parent.parent.parent.findNextSibling('li')
+        chaptertag = soup.select_one('div.chapter-body')
        if None == chaptertag:
            raise exceptions.FailedToDownload("Error downloading Chapter: {0}!  Missing required element!".format(url))
-        # Change td to a div.
-        chaptertag.name='div'
+        ## chapter text includes a copy of story title, author,
+        ## chapter title, & eroticatags specific to the chapter.  Did
+        ## before, too.

        return self.utf8FromSoup(url,chaptertag)
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@ -1592,18 +1592,13 @@ chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kinti


 [adult-fanfiction.org]
+use_basic_cache:true
+
 extra_valid_entries:eroticatags,disclaimer
 eroticatags_label:Erotica Tags
 disclaimer_label:Disclaimer
 extra_titlepage_entries:eroticatags,disclaimer

-## Some sites require login (or login for some rated stories) The
-## program can prompt you, or you can save it in config.  In
-## commandline version, this should go in your personal.ini, not
-## defaults.ini.
-#username:YourName
-#password:yourpassword
-
 [althistory.com]
 ## Note this is NOT the same as www.alternatehistory.com
 ## see [base_xenforoforum]