Fixes for fanfiktion.de & thehexfiles.net, add entries to teststory valid list.

This commit is contained in:
Jim Miller 2013-07-23 15:17:16 -05:00
parent ab0b25317b
commit 83d210566b
4 changed files with 13 additions and 11 deletions

View file

@ -1476,7 +1476,8 @@ extracategories:Stargate: Atlantis
[teststory:defaults]
valid_entries:title,author_list,authorId_list,authorUrl_list,storyUrl,
datePublished,dateUpdated,numWords,status,language,series,seriesUrl,
rating,category_list,genre_list,warnings_list,characters_list
rating,category_list,genre_list,warnings_list,characters_list,ships_list,
description,site,extratags
# {{storyId}} is a special case--it's the only one that works.
title:Test Story Title {{storyId}}

View file

@ -69,10 +69,10 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
@classmethod
def getSiteExampleURLs(self):
return "http://"+self.getSiteDomain()+"/s/46ccbef30000616306614050"
return "http://"+self.getSiteDomain()+"/s/46ccbef30000616306614050 http://"+self.getSiteDomain()+"/s/46ccbef30000616306614050/1 http://"+self.getSiteDomain()+"/s/46ccbef30000616306614050/1/story-name"
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?$"
return re.escape("http://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?"
## Login seems to be reasonably standard across eFiction sites.
def needToLoginCheck(self, data):
@ -138,15 +138,15 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
# Now go hunting for all the meta data and the chapter list.
## Title
a = soup.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
a = soup.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+"/"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
head = soup.find('div', {'class' : 'story-metadata-left-top'})
a = head.find('a')
self.story.setMetadata('authorId',a['href'].split('/')[2])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
self.story.setMetadata('author',stripHTML(a))
# Find the chapters:
for chapter in soup.find('select').findAll('option'):
@ -179,8 +179,8 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
td = tr[i].findAll('td')
self.story.addToList('category',stripHTML(td[1]))
self.story.setMetadata('rating', td[4].string)
self.story.setMetadata('numWords', td[5].string)
self.story.setMetadata('rating', stripHTML(td[4]))
self.story.setMetadata('numWords', stripHTML(td[5]))
# grab the text for an individual chapter.

View file

@ -103,7 +103,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
self.story.setMetadata('author',stripHTML(a))
asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl')))
try:
@ -118,7 +118,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
for info in asoup.findAll('table', {'cellspacing' : '4'}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None:
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
break

View file

@ -1458,7 +1458,8 @@ extracategories:Stargate: Atlantis
[teststory:defaults]
valid_entries:title,author_list,authorId_list,authorUrl_list,storyUrl,
datePublished,dateUpdated,numWords,status,language,series,seriesUrl,
rating,category_list,genre_list,warnings_list,characters_list
rating,category_list,genre_list,warnings_list,characters_list,ships_list,
description,site,extratags
# {{storyId}} is a special case--it's the only one that works.
title:Test Story Title {{storyId}}