adapter_adultfanfictionorg: Fixes for site changes #1305

This commit is contained in:
Jim Miller 2026-02-01 12:57:40 -06:00
parent 1b65a30798
commit 9b96c151a5
3 changed files with 56 additions and 199 deletions

View file

@ -1604,13 +1604,6 @@ eroticatags_label:Erotica Tags
disclaimer_label:Disclaimer
extra_titlepage_entries:eroticatags,disclaimer
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#username:YourName
#password:yourpassword
[althistory.com]
## Note this is NOT the same as www.alternatehistory.com
## see [base_xenforoforum]

View file

@ -68,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%Y-%m-%d"
self.dateformat = "%B %d, %Y"
## Added because adult-fanfiction.org does send you to
## www.adult-fanfiction.org when you go to it and it also moves
@ -139,91 +137,45 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
##This is not working right now, so I'm commenting it out, but leaving it for future testing
## Login seems to be reasonably standard across eFiction sites.
#def needToLoginCheck(self, data):
##This adapter will always require a login
# return True
# <form name="login" method="post" action="">
# <div class="top">E-mail: <span id="sprytextfield1">
# <input name="email" type="text" id="email" size="20" maxlength="255" />
# <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
# <div class="top">Password: <span id="sprytextfield2">
# <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
# <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
# <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
# <input type="submit" value="Login" />
# </div>
# </form>
##This is not working right now, so I'm commenting it out, but leaving it for future testing
#def performLogin(self, url, soup):
# params = {}
# if self.password:
# params['email'] = self.username
# params['pass1'] = self.password
# else:
# params['email'] = self.getConfig("username")
# params['pass1'] = self.getConfig("password")
# params['submit'] = 'Login'
# # copy all hidden input tags to pick up appropriate tokens.
# for tag in soup.find_all('input',{'type':'hidden'}):
# params[tag['name']] = tag['value']
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
# d = self.post_request(url, params, usecache=False)
# d = self.post_request(url, params, usecache=False)
# soup = self.make_soup(d)
#if not (soup.find('form', {'name' : 'login'}) == None):
# logger.info("Failed to login to URL %s as %s" % (url, params['email']))
# raise exceptions.FailedToLogin(url,params['email'])
# return False
#else:
# return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
## You need to have your is_adult set to true to get this story
if not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url)
else:
d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
if "Age verified successfully" not in d:
raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))
url = self.url
logger.debug("URL: "+url)
data = self.get_request(url)
# logger.debug(data)
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
soup = self.make_soup(data)
##This is not working right now, so I'm commenting it out, but leaving it for future testing
#self.performLogin(url, soup)
## Title
## Some of the titles have a backslash on the story page, but not on the Author's page
## So I am removing it from the title, so it can be found on the Author's page further in the code.
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
h1 = soup.find('h1')
# logger.debug("Title:%s"%h1)
self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
# Find the chapters:
chapters = soup.find('ul',{'class':'dropdown-content'})
for i, chapter in enumerate(chapters.find_all('a')):
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
# Find the chapters from first list only
chapters = soup.select_one('select.chapter-select').select('option')
for chapter in chapters:
self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"profile.php\?no=\d+"))
a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
if a == None:
# I know that the original author of fanficfare wants to always have metadata,
# but I posit that if the story is there, even if we can't get the metadata from the
@ -232,140 +184,56 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
self.story.setMetadata('author','Unknown')
logger.warning('There was no author found for the story... Metadata will not be retreived.')
self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<')
self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
else:
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl',a['href'])
self.story.setMetadata('author',stripHTML(a))
##The story page does not give much Metadata, so we go to the Author's page
## The story page does not give much Metadata, so we go to
## the Author's page. Except it's actually a sub-req for
## list of author's stories for that subdomain
author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
self.getBaseDomain(),
self.zone,
self.story.getMetadata('authorId'))
##Get the first Author page to see if there are multiple pages.
##AFF doesn't care if the page number is larger than the actual pages,
##it will continue to show the last page even if the variable is larger than the actual page
author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
#author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
##I'm resetting the author page to the zone for this story
self.story.setMetadata('authorUrl',author_Url)
logger.debug('Getting the author page: {0}'.format(author_Url))
logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
adata = self.get_request(author_Url)
if "The member you are looking for does not exist." in adata:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
#raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.")
none_found = "No stories found in this category."
if none_found in adata:
raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))
asoup = self.make_soup(adata)
# logger.debug(asoup)
##Getting the number of author pages
pages = 0
pagination=asoup.find('ul',{'class' : 'pagination'})
if pagination:
pages = pagination.find_all('li')[-1].find('a')
if not pages == None:
pages = pages['href'].split('=')[-1]
else:
pages = 0
story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
# logger.debug(story_card)
storya = None
##If there is only 1 page of stories, check it to get the Metadata,
if pages == 0:
a = asoup.find_all('li')
for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
storya = lc2
break
## otherwise go through the pages
else:
page=1
i=0
while i == 0:
##We already have the first page, so if this is the first time through, skip getting the page
if page != 1:
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
logger.debug('Getting the author page: {0}'.format(author_Url))
adata = self.get_request(author_Url)
##This will probably never be needed, since AFF doesn't seem to care what number you put as
## the page number, it will default to the last page, even if you use 1000, for an author
## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
if "The member you are looking for does not exist." in adata:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
# we look for the li element that has the story here
asoup = self.make_soup(adata)
## Category
## I've only seen one category per story so far, but just in case:
for cat in story_card.select('div.story-card-category'):
# remove Category:, old code suggests Located: is also
# possible, so removing by <strong>
cat.find("strong").decompose()
self.story.addToList('category',stripHTML(cat))
a = asoup.find_all('li')
for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
i=1
storya = lc2
break
page = page + 1
if page > int(pages):
break
self.setDescription(url,story_card.select_one('div.story-card-description'))
##Split the Metadata up into a list
##We have to change the soup type to a string, then remove the newlines, and double spaces,
##then changes the <br/> to '-:-', which seperates the different elemeents.
##Then we strip the HTML elements from the string.
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'.
##They are always in the same order.
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it
liMetadata = unicode(storya).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ')
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-'))
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')):
if i == 0:
# The value for the title has been manipulated, so may not be the same as gotten at the start.
# I'm going to use the href from the storya retrieved from the author's page to determine if it is correct.
if storya.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
elif i == 1:
##Get the description
self.setDescription(url,stripHTML(value.strip()))
else:
# the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
if 'Located :' in value:
self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
elif 'Category :' in value:
# Get the Category
self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
elif 'Content Tags :' in value:
# Get the Erotic Tags
value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
for code in re.split(r'\s',value):
self.story.addToList('eroticatags',code)
elif 'Posted :' in value:
# Get the Posted Date
value = value.replace(r'Posted :',r'').strip()
if value.startswith('008'):
# It is unknown how the 200 became 008, but I'm going to change it back here
value = value.replace('008','200')
elif value.startswith('0000'):
# Since the date is showing as 0000,
# I'm going to put the memberdate here
value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
elif 'Edited :' in value:
# Get the 'Updated' Edited date
# AFF has the time for the Updated date, and we only want the date,
# so we take the first 10 characters only
value = value.replace(r'Edited :',r'').strip()[0:10]
if value.startswith('008'):
# It is unknown how the 200 became 008, but I'm going to change it back here
value = value.replace('008','200')
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
elif value.startswith('0000') or '-00-' in value:
# Since the date is showing as 0000,
# or there is -00- in the date,
# I'm going to put the Published date here
self.story.setMetadata('dateUpdated', self.story.getMetadataRaw('datePublished'))
else:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
else:
# This catches the blank elements, and the Review and Dragon Prints.
# I am not interested in these, so do nothing
zzzzzzz=0
for tag in story_card.select('span.story-tag'):
self.story.addToList('eroticatags',stripHTML(tag))
## created/updates share formatting
for meta in story_card.select('div.story-card-meta-item span:last-child'):
meta = stripHTML(meta)
if 'Created: ' in meta:
meta = meta.replace('Created: ','')
self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
if 'Updated: ' in meta:
meta = meta.replace('Updated: ','')
self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))
# grab the text for an individual chapter.
def getChapterText(self, url):
@ -373,10 +241,11 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
chaptertag = soup.find('ul',{'class':'pagination'}).parent.parent.parent.findNextSibling('li')
chaptertag = soup.select_one('div.chapter-body')
if None == chaptertag:
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
# Change td to a div.
chaptertag.name='div'
## chapter text includes a copy of story title, author,
## chapter title, & eroticatags specific to the chapter. Did
## before, too.
return self.utf8FromSoup(url,chaptertag)

View file

@ -1592,18 +1592,13 @@ chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kinti
[adult-fanfiction.org]
use_basic_cache:true
extra_valid_entries:eroticatags,disclaimer
eroticatags_label:Erotica Tags
disclaimer_label:Disclaimer
extra_titlepage_entries:eroticatags,disclaimer
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#username:YourName
#password:yourpassword
[althistory.com]
## Note this is NOT the same as www.alternatehistory.com
## see [base_xenforoforum]