From 9b96c151a5033b983765b5bcb996a2062cf695d2 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 1 Feb 2026 12:57:40 -0600 Subject: [PATCH] adapter_adultfanfictionorg: Fixes for site changes #1305 --- calibre-plugin/plugin-defaults.ini | 7 - .../adapters/adapter_adultfanfictionorg.py | 239 ++++-------------- fanficfare/defaults.ini | 9 +- 3 files changed, 56 insertions(+), 199 deletions(-) diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 986401c0..5f2152ed 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -1604,13 +1604,6 @@ eroticatags_label:Erotica Tags disclaimer_label:Disclaimer extra_titlepage_entries:eroticatags,disclaimer -## Some sites require login (or login for some rated stories) The -## program can prompt you, or you can save it in config. In -## commandline version, this should go in your personal.ini, not -## defaults.ini. -#username:YourName -#password:yourpassword - [althistory.com] ## Note this is NOT the same as www.alternatehistory.com ## see [base_xenforoforum] diff --git a/fanficfare/adapters/adapter_adultfanfictionorg.py b/fanficfare/adapters/adapter_adultfanfictionorg.py index f9f5f539..81b20c39 100644 --- a/fanficfare/adapters/adapter_adultfanfictionorg.py +++ b/fanficfare/adapters/adapter_adultfanfictionorg.py @@ -68,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter): # The date format will vary from site to site. # http://docs.python.org/library/datetime.html#strftime-strptime-behavior - self.dateformat = "%Y-%m-%d" - - + self.dateformat = "%B %d, %Y" ## Added because adult-fanfiction.org does send you to ## www.adult-fanfiction.org when you go to it and it also moves @@ -139,91 +137,45 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$' - ##This is not working right now, so I'm commenting it out, but leaving it for future testing - ## Login seems to be reasonably standard across eFiction sites. - #def needToLoginCheck(self, data): - ##This adapter will always require a login - # return True - -#
-#
E-mail: -# -# Email is required.Invalid E-mail.
-#
Password: -# -# password is required.Minimum 8 characters8.Exceeded 32 characters.
-#

-# -#
-#
- - - ##This is not working right now, so I'm commenting it out, but leaving it for future testing - #def performLogin(self, url, soup): - # params = {} - - # if self.password: - # params['email'] = self.username - # params['pass1'] = self.password - # else: - # params['email'] = self.getConfig("username") - # params['pass1'] = self.getConfig("password") - # params['submit'] = 'Login' - - # # copy all hidden input tags to pick up appropriate tokens. - # for tag in soup.find_all('input',{'type':'hidden'}): - # params[tag['name']] = tag['value'] - - # logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1'])) - - # d = self.post_request(url, params, usecache=False) - # d = self.post_request(url, params, usecache=False) - # soup = self.make_soup(d) - - #if not (soup.find('form', {'name' : 'login'}) == None): - # logger.info("Failed to login to URL %s as %s" % (url, params['email'])) - # raise exceptions.FailedToLogin(url,params['email']) - # return False - #else: - # return True - ## Getting the chapter list and the meta data, plus 'is adult' checking. def doExtractChapterUrlsAndMetadata(self, get_cover=True): ## You need to have your is_adult set to true to get this story if not (self.is_adult or self.getConfig("is_adult")): raise exceptions.AdultCheckRequired(self.url) + else: + d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"}) + if "Age verified successfully" not in d: + raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d)) url = self.url logger.debug("URL: "+url) data = self.get_request(url) + # logger.debug(data) if "The dragons running the back end of the site can not seem to find the story you are looking for." in data: raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain())) soup = self.make_soup(data) - ##This is not working right now, so I'm commenting it out, but leaving it for future testing - #self.performLogin(url, soup) - - ## Title ## Some of the titles have a backslash on the story page, but not on the Author's page ## So I am removing it from the title, so it can be found on the Author's page further in the code. ## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them, ## so I have to here as well. I used multiple replaces to make sure, since I did the same below. - a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip()) + h1 = soup.find('h1') + # logger.debug("Title:%s"%h1) + self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip()) - # Find the chapters: - chapters = soup.find('ul',{'class':'dropdown-content'}) - for i, chapter in enumerate(chapters.find_all('a')): - self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1)) + # Find the chapters from first list only + chapters = soup.select_one('select.chapter-select').select('option') + for chapter in chapters: + self.add_chapter(chapter,self.url+'&chapter='+chapter['value']) # Find authorid and URL from... author url. - a = soup.find('a', href=re.compile(r"profile.php\?no=\d+")) + a = soup.find('a', href=re.compile(r"profile.php\?id=\d+")) if a == None: # I know that the original author of fanficfare wants to always have metadata, # but I posit that if the story is there, even if we can't get the metadata from the @@ -232,140 +184,56 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter): self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org') self.story.setMetadata('author','Unknown') logger.warning('There was no author found for the story... Metadata will not be retreived.') - self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<') + self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<') else: self.story.setMetadata('authorId',a['href'].split('=')[1]) self.story.setMetadata('authorUrl',a['href']) self.story.setMetadata('author',stripHTML(a)) - ##The story page does not give much Metadata, so we go to the Author's page + ## The story page does not give much Metadata, so we go to + ## the Author's page. Except it's actually a sub-req for + ## list of author's stories for that subdomain + author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format( + self.getBaseDomain(), + self.zone, + self.story.getMetadata('authorId')) - ##Get the first Author page to see if there are multiple pages. - ##AFF doesn't care if the page number is larger than the actual pages, - ##it will continue to show the last page even if the variable is larger than the actual page - author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone) - #author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1' - - ##I'm resetting the author page to the zone for this story - self.story.setMetadata('authorUrl',author_Url) - - logger.debug('Getting the author page: {0}'.format(author_Url)) + logger.debug('Getting the load-user-stories page: {0}'.format(author_Url)) adata = self.get_request(author_Url) - if "The member you are looking for does not exist." in adata: - raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain())) - #raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.") + none_found = "No stories found in this category." + if none_found in adata: + raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found)) asoup = self.make_soup(adata) + # logger.debug(asoup) - ##Getting the number of author pages - pages = 0 - pagination=asoup.find('ul',{'class' : 'pagination'}) - if pagination: - pages = pagination.find_all('li')[-1].find('a') - if not pages == None: - pages = pages['href'].split('=')[-1] - else: - pages = 0 + story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url)) + # logger.debug(story_card) - storya = None - ##If there is only 1 page of stories, check it to get the Metadata, - if pages == 0: - a = asoup.find_all('li') - for lc2 in a: - if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")): - storya = lc2 - break - ## otherwise go through the pages - else: - page=1 - i=0 - while i == 0: - ##We already have the first page, so if this is the first time through, skip getting the page - if page != 1: - author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page)) - logger.debug('Getting the author page: {0}'.format(author_Url)) - adata = self.get_request(author_Url) - ##This will probably never be needed, since AFF doesn't seem to care what number you put as - ## the page number, it will default to the last page, even if you use 1000, for an author - ## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case). - if "The member you are looking for does not exist." in adata: - raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain())) - # we look for the li element that has the story here - asoup = self.make_soup(adata) + ## Category + ## I've only seen one category per story so far, but just in case: + for cat in story_card.select('div.story-card-category'): + # remove Category:, old code suggests Located: is also + # possible, so removing by + cat.find("strong").decompose() + self.story.addToList('category',stripHTML(cat)) - a = asoup.find_all('li') - for lc2 in a: - if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")): - i=1 - storya = lc2 - break - page = page + 1 - if page > int(pages): - break + self.setDescription(url,story_card.select_one('div.story-card-description')) - ##Split the Metadata up into a list - ##We have to change the soup type to a string, then remove the newlines, and double spaces, - ##then changes the
to '-:-', which seperates the different elemeents. - ##Then we strip the HTML elements from the string. - ##There is also a double
, so we have to fix that, then remove the leading and trailing '-:-'. - ##They are always in the same order. - ## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it - liMetadata = unicode(storya).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ') - liMetadata = stripHTML(liMetadata.replace(r'
','-:-').replace('','-:-')) - liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8') - for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')): - if i == 0: - # The value for the title has been manipulated, so may not be the same as gotten at the start. - # I'm going to use the href from the storya retrieved from the author's page to determine if it is correct. - if storya.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url: - raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url)) - elif i == 1: - ##Get the description - self.setDescription(url,stripHTML(value.strip())) - else: - # the rest of the values can be missing, so instead of hardcoding the numbers, we search for them. - if 'Located :' in value: - self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip()) - elif 'Category :' in value: - # Get the Category - self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip()) - elif 'Content Tags :' in value: - # Get the Erotic Tags - value = stripHTML(value.replace(r'Content Tags :',r'')).strip() - for code in re.split(r'\s',value): - self.story.addToList('eroticatags',code) - elif 'Posted :' in value: - # Get the Posted Date - value = value.replace(r'Posted :',r'').strip() - if value.startswith('008'): - # It is unknown how the 200 became 008, but I'm going to change it back here - value = value.replace('008','200') - elif value.startswith('0000'): - # Since the date is showing as 0000, - # I'm going to put the memberdate here - value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip() - self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) - elif 'Edited :' in value: - # Get the 'Updated' Edited date - # AFF has the time for the Updated date, and we only want the date, - # so we take the first 10 characters only - value = value.replace(r'Edited :',r'').strip()[0:10] - if value.startswith('008'): - # It is unknown how the 200 became 008, but I'm going to change it back here - value = value.replace('008','200') - self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) - elif value.startswith('0000') or '-00-' in value: - # Since the date is showing as 0000, - # or there is -00- in the date, - # I'm going to put the Published date here - self.story.setMetadata('dateUpdated', self.story.getMetadataRaw('datePublished')) - else: - self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) - else: - # This catches the blank elements, and the Review and Dragon Prints. - # I am not interested in these, so do nothing - zzzzzzz=0 + for tag in story_card.select('span.story-tag'): + self.story.addToList('eroticatags',stripHTML(tag)) + + ## created/updates share formatting + for meta in story_card.select('div.story-card-meta-item span:last-child'): + meta = stripHTML(meta) + if 'Created: ' in meta: + meta = meta.replace('Created: ','') + self.story.setMetadata('datePublished', makeDate(meta, self.dateformat)) + + if 'Updated: ' in meta: + meta = meta.replace('Updated: ','') + self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat)) # grab the text for an individual chapter. def getChapterText(self, url): @@ -373,10 +241,11 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter): logger.debug('Getting chapter text from: %s' % url) soup = self.make_soup(self.get_request(url)) - chaptertag = soup.find('ul',{'class':'pagination'}).parent.parent.parent.findNextSibling('li') + chaptertag = soup.select_one('div.chapter-body') if None == chaptertag: raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url)) - # Change td to a div. - chaptertag.name='div' + ## chapter text includes a copy of story title, author, + ## chapter title, & eroticatags specific to the chapter. Did + ## before, too. return self.utf8FromSoup(url,chaptertag) diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 7d2e6d79..f2684c52 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -1592,18 +1592,13 @@ chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kinti [adult-fanfiction.org] +use_basic_cache:true + extra_valid_entries:eroticatags,disclaimer eroticatags_label:Erotica Tags disclaimer_label:Disclaimer extra_titlepage_entries:eroticatags,disclaimer -## Some sites require login (or login for some rated stories) The -## program can prompt you, or you can save it in config. In -## commandline version, this should go in your personal.ini, not -## defaults.ini. -#username:YourName -#password:yourpassword - [althistory.com] ## Note this is NOT the same as www.alternatehistory.com ## see [base_xenforoforum]