mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-08 12:36:11 +02:00
adapter_adultfanfictionorg: Fixes for site changes #1305
This commit is contained in:
parent
1b65a30798
commit
9b96c151a5
3 changed files with 56 additions and 199 deletions
|
|
@ -1604,13 +1604,6 @@ eroticatags_label:Erotica Tags
|
|||
disclaimer_label:Disclaimer
|
||||
extra_titlepage_entries:eroticatags,disclaimer
|
||||
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
## defaults.ini.
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
[althistory.com]
|
||||
## Note this is NOT the same as www.alternatehistory.com
|
||||
## see [base_xenforoforum]
|
||||
|
|
|
|||
|
|
@ -68,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%m-%d"
|
||||
|
||||
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
## Added because adult-fanfiction.org does send you to
|
||||
## www.adult-fanfiction.org when you go to it and it also moves
|
||||
|
|
@ -139,91 +137,45 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
#def needToLoginCheck(self, data):
|
||||
##This adapter will always require a login
|
||||
# return True
|
||||
|
||||
# <form name="login" method="post" action="">
|
||||
# <div class="top">E-mail: <span id="sprytextfield1">
|
||||
# <input name="email" type="text" id="email" size="20" maxlength="255" />
|
||||
# <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
|
||||
# <div class="top">Password: <span id="sprytextfield2">
|
||||
# <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
|
||||
# <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
|
||||
# <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
|
||||
# <input type="submit" value="Login" />
|
||||
# </div>
|
||||
# </form>
|
||||
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
#def performLogin(self, url, soup):
|
||||
# params = {}
|
||||
|
||||
# if self.password:
|
||||
# params['email'] = self.username
|
||||
# params['pass1'] = self.password
|
||||
# else:
|
||||
# params['email'] = self.getConfig("username")
|
||||
# params['pass1'] = self.getConfig("password")
|
||||
# params['submit'] = 'Login'
|
||||
|
||||
# # copy all hidden input tags to pick up appropriate tokens.
|
||||
# for tag in soup.find_all('input',{'type':'hidden'}):
|
||||
# params[tag['name']] = tag['value']
|
||||
|
||||
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
|
||||
|
||||
# d = self.post_request(url, params, usecache=False)
|
||||
# d = self.post_request(url, params, usecache=False)
|
||||
# soup = self.make_soup(d)
|
||||
|
||||
#if not (soup.find('form', {'name' : 'login'}) == None):
|
||||
# logger.info("Failed to login to URL %s as %s" % (url, params['email']))
|
||||
# raise exceptions.FailedToLogin(url,params['email'])
|
||||
# return False
|
||||
#else:
|
||||
# return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
## You need to have your is_adult set to true to get this story
|
||||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
else:
|
||||
d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
|
||||
if "Age verified successfully" not in d:
|
||||
raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
# logger.debug(data)
|
||||
|
||||
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
#self.performLogin(url, soup)
|
||||
|
||||
|
||||
## Title
|
||||
## Some of the titles have a backslash on the story page, but not on the Author's page
|
||||
## So I am removing it from the title, so it can be found on the Author's page further in the code.
|
||||
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
|
||||
## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
|
||||
a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
||||
h1 = soup.find('h1')
|
||||
# logger.debug("Title:%s"%h1)
|
||||
self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('ul',{'class':'dropdown-content'})
|
||||
for i, chapter in enumerate(chapters.find_all('a')):
|
||||
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
|
||||
# Find the chapters from first list only
|
||||
chapters = soup.select_one('select.chapter-select').select('option')
|
||||
for chapter in chapters:
|
||||
self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])
|
||||
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"profile.php\?no=\d+"))
|
||||
a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
|
||||
if a == None:
|
||||
# I know that the original author of fanficfare wants to always have metadata,
|
||||
# but I posit that if the story is there, even if we can't get the metadata from the
|
||||
|
|
@ -232,140 +184,56 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
|
||||
self.story.setMetadata('author','Unknown')
|
||||
logger.warning('There was no author found for the story... Metadata will not be retreived.')
|
||||
self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<')
|
||||
self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
|
||||
else:
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl',a['href'])
|
||||
self.story.setMetadata('author',stripHTML(a))
|
||||
|
||||
##The story page does not give much Metadata, so we go to the Author's page
|
||||
## The story page does not give much Metadata, so we go to
|
||||
## the Author's page. Except it's actually a sub-req for
|
||||
## list of author's stories for that subdomain
|
||||
author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
|
||||
self.getBaseDomain(),
|
||||
self.zone,
|
||||
self.story.getMetadata('authorId'))
|
||||
|
||||
##Get the first Author page to see if there are multiple pages.
|
||||
##AFF doesn't care if the page number is larger than the actual pages,
|
||||
##it will continue to show the last page even if the variable is larger than the actual page
|
||||
author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
|
||||
#author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
|
||||
|
||||
##I'm resetting the author page to the zone for this story
|
||||
self.story.setMetadata('authorUrl',author_Url)
|
||||
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
|
||||
adata = self.get_request(author_Url)
|
||||
|
||||
if "The member you are looking for does not exist." in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
||||
#raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.")
|
||||
none_found = "No stories found in this category."
|
||||
if none_found in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))
|
||||
|
||||
asoup = self.make_soup(adata)
|
||||
# logger.debug(asoup)
|
||||
|
||||
##Getting the number of author pages
|
||||
pages = 0
|
||||
pagination=asoup.find('ul',{'class' : 'pagination'})
|
||||
if pagination:
|
||||
pages = pagination.find_all('li')[-1].find('a')
|
||||
if not pages == None:
|
||||
pages = pages['href'].split('=')[-1]
|
||||
else:
|
||||
pages = 0
|
||||
story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
|
||||
# logger.debug(story_card)
|
||||
|
||||
storya = None
|
||||
##If there is only 1 page of stories, check it to get the Metadata,
|
||||
if pages == 0:
|
||||
a = asoup.find_all('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
storya = lc2
|
||||
break
|
||||
## otherwise go through the pages
|
||||
else:
|
||||
page=1
|
||||
i=0
|
||||
while i == 0:
|
||||
##We already have the first page, so if this is the first time through, skip getting the page
|
||||
if page != 1:
|
||||
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
adata = self.get_request(author_Url)
|
||||
##This will probably never be needed, since AFF doesn't seem to care what number you put as
|
||||
## the page number, it will default to the last page, even if you use 1000, for an author
|
||||
## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
|
||||
if "The member you are looking for does not exist." in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
||||
# we look for the li element that has the story here
|
||||
asoup = self.make_soup(adata)
|
||||
## Category
|
||||
## I've only seen one category per story so far, but just in case:
|
||||
for cat in story_card.select('div.story-card-category'):
|
||||
# remove Category:, old code suggests Located: is also
|
||||
# possible, so removing by <strong>
|
||||
cat.find("strong").decompose()
|
||||
self.story.addToList('category',stripHTML(cat))
|
||||
|
||||
a = asoup.find_all('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
i=1
|
||||
storya = lc2
|
||||
break
|
||||
page = page + 1
|
||||
if page > int(pages):
|
||||
break
|
||||
self.setDescription(url,story_card.select_one('div.story-card-description'))
|
||||
|
||||
##Split the Metadata up into a list
|
||||
##We have to change the soup type to a string, then remove the newlines, and double spaces,
|
||||
##then changes the <br/> to '-:-', which seperates the different elemeents.
|
||||
##Then we strip the HTML elements from the string.
|
||||
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'.
|
||||
##They are always in the same order.
|
||||
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it
|
||||
liMetadata = unicode(storya).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ')
|
||||
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-'))
|
||||
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
|
||||
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')):
|
||||
if i == 0:
|
||||
# The value for the title has been manipulated, so may not be the same as gotten at the start.
|
||||
# I'm going to use the href from the storya retrieved from the author's page to determine if it is correct.
|
||||
if storya.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
|
||||
raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
|
||||
elif i == 1:
|
||||
##Get the description
|
||||
self.setDescription(url,stripHTML(value.strip()))
|
||||
else:
|
||||
# the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
|
||||
if 'Located :' in value:
|
||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
||||
elif 'Category :' in value:
|
||||
# Get the Category
|
||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
||||
elif 'Content Tags :' in value:
|
||||
# Get the Erotic Tags
|
||||
value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
|
||||
for code in re.split(r'\s',value):
|
||||
self.story.addToList('eroticatags',code)
|
||||
elif 'Posted :' in value:
|
||||
# Get the Posted Date
|
||||
value = value.replace(r'Posted :',r'').strip()
|
||||
if value.startswith('008'):
|
||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
||||
value = value.replace('008','200')
|
||||
elif value.startswith('0000'):
|
||||
# Since the date is showing as 0000,
|
||||
# I'm going to put the memberdate here
|
||||
value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
elif 'Edited :' in value:
|
||||
# Get the 'Updated' Edited date
|
||||
# AFF has the time for the Updated date, and we only want the date,
|
||||
# so we take the first 10 characters only
|
||||
value = value.replace(r'Edited :',r'').strip()[0:10]
|
||||
if value.startswith('008'):
|
||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
||||
value = value.replace('008','200')
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
elif value.startswith('0000') or '-00-' in value:
|
||||
# Since the date is showing as 0000,
|
||||
# or there is -00- in the date,
|
||||
# I'm going to put the Published date here
|
||||
self.story.setMetadata('dateUpdated', self.story.getMetadataRaw('datePublished'))
|
||||
else:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
else:
|
||||
# This catches the blank elements, and the Review and Dragon Prints.
|
||||
# I am not interested in these, so do nothing
|
||||
zzzzzzz=0
|
||||
for tag in story_card.select('span.story-tag'):
|
||||
self.story.addToList('eroticatags',stripHTML(tag))
|
||||
|
||||
## created/updates share formatting
|
||||
for meta in story_card.select('div.story-card-meta-item span:last-child'):
|
||||
meta = stripHTML(meta)
|
||||
if 'Created: ' in meta:
|
||||
meta = meta.replace('Created: ','')
|
||||
self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
|
||||
|
||||
if 'Updated: ' in meta:
|
||||
meta = meta.replace('Updated: ','')
|
||||
self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
|
@ -373,10 +241,11 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
chaptertag = soup.find('ul',{'class':'pagination'}).parent.parent.parent.findNextSibling('li')
|
||||
chaptertag = soup.select_one('div.chapter-body')
|
||||
if None == chaptertag:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
# Change td to a div.
|
||||
chaptertag.name='div'
|
||||
## chapter text includes a copy of story title, author,
|
||||
## chapter title, & eroticatags specific to the chapter. Did
|
||||
## before, too.
|
||||
|
||||
return self.utf8FromSoup(url,chaptertag)
|
||||
|
|
|
|||
|
|
@ -1592,18 +1592,13 @@ chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kinti
|
|||
|
||||
|
||||
[adult-fanfiction.org]
|
||||
use_basic_cache:true
|
||||
|
||||
extra_valid_entries:eroticatags,disclaimer
|
||||
eroticatags_label:Erotica Tags
|
||||
disclaimer_label:Disclaimer
|
||||
extra_titlepage_entries:eroticatags,disclaimer
|
||||
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
## defaults.ini.
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
[althistory.com]
|
||||
## Note this is NOT the same as www.alternatehistory.com
|
||||
## see [base_xenforoforum]
|
||||
|
|
|
|||
Loading…
Reference in a new issue