do_update_hook for fimf, find newest chapter and update from there.

This commit is contained in:
Jim Miller 2013-09-29 14:19:18 -05:00
parent 78845d0d1e
commit 7fdc59691f
6 changed files with 52 additions and 6 deletions

View file

@ -198,7 +198,10 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
# dup handling from ffdl_plugin needed for anthology updates.
if chaptercount > urlchaptercount:
raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png')
if adapter.getConfig("do_update_hook"):
chaptercount = adapter.hookForUpdates(chaptercount)
print("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount))
print("write to %s"%outfile)

View file

@ -1205,6 +1205,13 @@ groups_label:Groups
## when a password is required rather than prompting every time.
#fail_on_password: false
## fimfiction.net stories allow chapters to be added out of order. So
## the newest chapter may not be the last one. FFDL update doesn't
## like that. If do_update_hook is uncommented and set true, the
## adapter will discard all existing chapters from the newest one on
## when updating to enforce accurate chapters.
#do_update_hook:false
[www.harrypotterfanfiction.com]
## Site dedicated to these categories/characters/ships
extracategories:Harry Potter

View file

@ -270,7 +270,6 @@ def main(argv,
elif chaptercount == 0:
print "%s doesn't contain any recognizable chapters, probably from a different source. Not updating." % (output_filename)
else:
print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)
if not options.metaonly:
# update now handled by pre-populating the old
@ -284,6 +283,11 @@ def main(argv,
adapter.calibrebookmark,
adapter.logfile) = get_update_data(output_filename)
print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)
if adapter.getConfig("do_update_hook"):
chaptercount = adapter.hookForUpdates(chaptercount)
writeStory(configuration,adapter,"epub")
else:

View file

@ -96,8 +96,10 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
if "Warning: mysql_fetch_array(): supplied argument is not a valid MySQL result resource" in data:
raise exceptions.StoryDoesNotExist(self.url)
if "/images/missing_story.png" in data:
raise exceptions.StoryDoesNotExist(self.url)
# Can cause problems if a missing story is referenced in a comment.
# Shouldn't be needed anyway.
# if "/images/missing_story.png" in data:
# raise exceptions.StoryDoesNotExist(self.url)
if "This story has been marked as having adult content." in data:
raise exceptions.AdultCheckRequired(self.url)
@ -199,20 +201,31 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
oldestChapter = None
newestChapter = None
self.newestChapterNum = None # save for comparing during update.
# Scan all chapters to find the oldest and newest, on
# FiMFiction it's possible for authors to insert new chapters
# out-of-order or change the dates of earlier ones by editing
# them--That WILL break epub update.
for chapterDate in soup.findAll('span', {'class':'date'}):
for index, chapterDate in enumerate(soup.findAll('span', {'class':'date'})):
date=re.sub(r"(\d+)(st|nd|rd|th)",r"\1",chapterDate.contents[1].strip())
chapterDate = makeDate(date,self.dateformat)
if oldestChapter == None or chapterDate < oldestChapter:
oldestChapter = chapterDate
if newestChapter == None or chapterDate > newestChapter:
newestChapter = chapterDate
self.story.setMetadata("datePublished", oldestChapter)
self.newestChapterNum = index
self.story.setMetadata("dateUpdated", newestChapter)
pubdatetag = soup.find('span', {'class':'date_approved'})
if pubdatetag is None:
self.story.setMetadata("datePublished", oldestChapter)
else:
pubdateraw = pubdatetag('span')[1].text
datestripped=re.sub(r"(\d+)(st|nd|rd|th)",r"\1",pubdateraw.strip())
pubDate = makeDate(datestripped,self.dateformat)
self.story.setMetadata("datePublished", pubDate)
chars = soup.find("div", {"class":"inner_data"})
# fimfic stopped putting the char name on or around the char
# icon now for some reason. Pull it from the image name with
@ -241,8 +254,16 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
for groupName in rawGroupList.findAll('a', {'href':re.compile('^/group/')}):
self.story.addToList("groups",stripHTML(groupName))
def hookForUpdates(self,chaptercount):
if self.oldchapters and len(self.oldchapters) > self.newestChapterNum:
print("Existing epub has %s chapters\nNewest chapter is %s. Discarding old chapters from there on."%(len(self.oldchapters), self.newestChapterNum+1))
self.oldchapters = self.oldchapters[:self.newestChapterNum]
return len(self.oldchapters)
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'class' : 'chapter_content'})
if soup == None:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)

View file

@ -247,6 +247,10 @@ class BaseSiteAdapter(Configurable):
self.metadataDone = True
return self.story
def hookForUpdates(self,chaptercount):
"Usually not needed."
return chaptercount
###############################
@staticmethod

View file

@ -1187,6 +1187,13 @@ groups_label:Groups
## when a password is required rather than prompting every time.
#fail_on_password: false
## fimfiction.net stories allow chapters to be added out of order. So
## the newest chapter may not be the last one. FFDL update doesn't
## like that. If do_update_hook is uncommented and set true, the
## adapter will discard all existing chapters from the newest one on
## when updating to enforce accurate chapters.
#do_update_hook:false
[www.harrypotterfanfiction.com]
## Site dedicated to these categories/characters/ships
extracategories:Harry Potter