From 7fdc59691f3cc1f97733932c4d9a62d068e3556c Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 29 Sep 2013 14:19:18 -0500 Subject: [PATCH] do_update_hook for fimf, find newest chapter and update from there. --- calibre-plugin/jobs.py | 5 +++- defaults.ini | 7 +++++ downloader.py | 6 +++- .../adapters/adapter_fimfictionnet.py | 29 ++++++++++++++++--- fanficdownloader/adapters/base_adapter.py | 4 +++ plugin-defaults.ini | 7 +++++ 6 files changed, 52 insertions(+), 6 deletions(-) diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 7959f6d7..a9879ff0 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -198,7 +198,10 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): # dup handling from ffdl_plugin needed for anthology updates. if chaptercount > urlchaptercount: raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png') - + + if adapter.getConfig("do_update_hook"): + chaptercount = adapter.hookForUpdates(chaptercount) + print("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)) print("write to %s"%outfile) diff --git a/defaults.ini b/defaults.ini index 343969fa..437a76f7 100644 --- a/defaults.ini +++ b/defaults.ini @@ -1205,6 +1205,13 @@ groups_label:Groups ## when a password is required rather than prompting every time. #fail_on_password: false +## fimfiction.net stories allow chapters to be added out of order. So +## the newest chapter may not be the last one. FFDL update doesn't +## like that. If do_update_hook is uncommented and set true, the +## adapter will discard all existing chapters from the newest one on +## when updating to enforce accurate chapters. +#do_update_hook:false + [www.harrypotterfanfiction.com] ## Site dedicated to these categories/characters/ships extracategories:Harry Potter diff --git a/downloader.py b/downloader.py index db343f92..b7907772 100644 --- a/downloader.py +++ b/downloader.py @@ -270,7 +270,6 @@ def main(argv, elif chaptercount == 0: print "%s doesn't contain any recognizable chapters, probably from a different source. Not updating." % (output_filename) else: - print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount) if not options.metaonly: # update now handled by pre-populating the old @@ -284,6 +283,11 @@ def main(argv, adapter.calibrebookmark, adapter.logfile) = get_update_data(output_filename) + print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount) + + if adapter.getConfig("do_update_hook"): + chaptercount = adapter.hookForUpdates(chaptercount) + writeStory(configuration,adapter,"epub") else: diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py index 8011a34e..1e72e598 100644 --- a/fanficdownloader/adapters/adapter_fimfictionnet.py +++ b/fanficdownloader/adapters/adapter_fimfictionnet.py @@ -96,8 +96,10 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): if "Warning: mysql_fetch_array(): supplied argument is not a valid MySQL result resource" in data: raise exceptions.StoryDoesNotExist(self.url) - if "/images/missing_story.png" in data: - raise exceptions.StoryDoesNotExist(self.url) + # Can cause problems if a missing story is referenced in a comment. + # Shouldn't be needed anyway. + # if "/images/missing_story.png" in data: + # raise exceptions.StoryDoesNotExist(self.url) if "This story has been marked as having adult content." in data: raise exceptions.AdultCheckRequired(self.url) @@ -199,20 +201,31 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): oldestChapter = None newestChapter = None + self.newestChapterNum = None # save for comparing during update. # Scan all chapters to find the oldest and newest, on # FiMFiction it's possible for authors to insert new chapters # out-of-order or change the dates of earlier ones by editing # them--That WILL break epub update. - for chapterDate in soup.findAll('span', {'class':'date'}): + for index, chapterDate in enumerate(soup.findAll('span', {'class':'date'})): date=re.sub(r"(\d+)(st|nd|rd|th)",r"\1",chapterDate.contents[1].strip()) chapterDate = makeDate(date,self.dateformat) if oldestChapter == None or chapterDate < oldestChapter: oldestChapter = chapterDate if newestChapter == None or chapterDate > newestChapter: newestChapter = chapterDate - self.story.setMetadata("datePublished", oldestChapter) + self.newestChapterNum = index + self.story.setMetadata("dateUpdated", newestChapter) + pubdatetag = soup.find('span', {'class':'date_approved'}) + if pubdatetag is None: + self.story.setMetadata("datePublished", oldestChapter) + else: + pubdateraw = pubdatetag('span')[1].text + datestripped=re.sub(r"(\d+)(st|nd|rd|th)",r"\1",pubdateraw.strip()) + pubDate = makeDate(datestripped,self.dateformat) + self.story.setMetadata("datePublished", pubDate) + chars = soup.find("div", {"class":"inner_data"}) # fimfic stopped putting the char name on or around the char # icon now for some reason. Pull it from the image name with @@ -241,8 +254,16 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): for groupName in rawGroupList.findAll('a', {'href':re.compile('^/group/')}): self.story.addToList("groups",stripHTML(groupName)) + def hookForUpdates(self,chaptercount): + if self.oldchapters and len(self.oldchapters) > self.newestChapterNum: + print("Existing epub has %s chapters\nNewest chapter is %s. Discarding old chapters from there on."%(len(self.oldchapters), self.newestChapterNum+1)) + self.oldchapters = self.oldchapters[:self.newestChapterNum] + return len(self.oldchapters) + + def getChapterText(self, url): logger.debug('Getting chapter text from: %s' % url) + soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'class' : 'chapter_content'}) if soup == None: raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 4ca577dc..5acbe9d9 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -247,6 +247,10 @@ class BaseSiteAdapter(Configurable): self.metadataDone = True return self.story + def hookForUpdates(self,chaptercount): + "Usually not needed." + return chaptercount + ############################### @staticmethod diff --git a/plugin-defaults.ini b/plugin-defaults.ini index b12d0a7f..b0a1ea0e 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -1187,6 +1187,13 @@ groups_label:Groups ## when a password is required rather than prompting every time. #fail_on_password: false +## fimfiction.net stories allow chapters to be added out of order. So +## the newest chapter may not be the last one. FFDL update doesn't +## like that. If do_update_hook is uncommented and set true, the +## adapter will discard all existing chapters from the newest one on +## when updating to enforce accurate chapters. +#do_update_hook:false + [www.harrypotterfanfiction.com] ## Site dedicated to these categories/characters/ships extracategories:Harry Potter