Fix genre parsing for adapter_fanficauthorsnet.

This commit is contained in:
Jim Miller 2018-09-04 12:49:49 -05:00
parent 30006698ad
commit 552cdcff1d

View file

@ -66,10 +66,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%d %b %y"
################################################################################################
def getBaseDomain(self):
''' Added because fanficauthors.net does send you to www.fanficauthors.net when
''' Added because fanficauthors.net does send you to www.fanficauthors.net when
you go to it '''
return 'fanficauthors.net'
@ -154,7 +154,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
else:
params['username'] = self.getConfig("username")
params['password'] = self.getConfig("password")
if not params['username']:
raise exceptions.FailedToLogin('You need to have your username and password set.',params['username'])
@ -175,12 +175,12 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
raise exceptions.StoryDoesNotExist(
"{0}.{1} says: The requested file has not been found".format(
self.zone, self.getBaseDomain()))
# use BeautifulSoup HTML parser to make everything easier to find.
soup = self.make_soup(data)
# Find authorid and URL.
# There is no place where the author's name is listed,
# Find authorid and URL.
# There is no place where the author's name is listed,
# except for in the image at the top of the page. We have to
# work with the url entered to get the Author's Name
a = self.zone.split('.')[0]
@ -188,7 +188,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
a = a.replace('-',' ').title()
self.story.setMetadata('author',a)
self.story.setMetadata('authorUrl','https://{0}/'.format(self.parsedUrl.netloc))
loginUrl = self.story.getMetadata('authorUrl')+'account/'
loginsoup = self.make_soup(self._fetchUrl(loginUrl))
if True:
@ -213,21 +213,21 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
for i, chapter in enumerate(chapters):
if '/reviews/' not in chapter['href']:
# here we get the update date. We will update this for every chapter,
# here we get the update date. We will update this for every chapter,
# so we get the last one.
updatedate = stripHTML(unicode(chapters[i].parent)).split(
'Uploaded on:')[1].strip()
updatedate = updatedate.replace('st ',' ').replace('nd ',' ').replace(
'rd ',' ').replace('th ',' ')
self.story.setMetadata('dateUpdated', makeDate(updatedate, self.dateformat))
if '::' in stripHTML(unicode(chapter)):
chapter_title = stripHTML(unicode(chapter).split('::')[1])
else:
chapter_title = stripHTML(unicode(chapter))
chapter_Url = self.story.getMetadata('authorUrl')+chapter['href'][1:]
self.add_chapter(chapter_title, chapter_Url)
genres = ("Drama","Romance")
gotgenre = False
## Getting the Metadata that is there
@ -245,7 +245,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
elif heading == 'Word count':
self.story.setMetadata('numWords',text)
elif heading == 'Genre':
self.story.setMetadata('genre',text.replace(',',', ').replace(' ',' '))
self.story.extendList('genre',text.split(';'))
gotgenre = True
else:
if gotgenre == True:
@ -259,11 +259,11 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
for gen in genres:
if metad == gen:
self.story.addToList('genre',metad.strip())
summary = div.find('blockquote').get_text()
self.setDescription(url,summary)
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
@ -271,7 +271,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
soup = self.make_soup(self._fetchUrl(url))
story = soup.find('div',{'class':'story'})
if story == None:
raise exceptions.FailedToDownload(
"Error downloading Chapter: '{0}'! Missing required element!".format(url))
@ -280,5 +280,5 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
for tag in story.findAll('ul',{'class':'pager'}) + story.findAll(
'div',{'class':'alert'}) + story.findAll('div', {'class':'btn-group'}):
tag.extract()
return self.utf8FromSoup(url,story)