diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index a1a8dffb..15348058 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 2, 0) + version = (1, 2, 1) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index d677ea11..d9f606b5 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -372,6 +372,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): url = book['url'] print("url:%s"%url) + skip_date_update = False ## was self.ffdlconfig, but we need to be able to change it ## when doing epub update. @@ -409,8 +410,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction): book['author_sort'] = book['author'] = story.getMetadata("author", removeallentities=True) book['publisher'] = story.getMetadata("site") book['tags'] = writer.getTags() - book['pubdate'] = story.getMetadataRaw('datePublished') - book['timestamp'] = story.getMetadataRaw('dateCreated') book['comments'] = story.getMetadata("description") #, removeallentities=True) comments handles entities better. # adapter.opener is the element with a threadlock. But del @@ -478,7 +477,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): ## newer/chaptercount checks are the same for both: # Update epub, but only if more chapters. - if collision == UPDATE: + if collision in (UPDATE,UPDATEALWAYS): # collision == UPDATE # 'book' can exist without epub. If there's no existing epub, # let it go and it will download it. if db.has_format(book_id,fileform,index_is_id=True): @@ -489,12 +488,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction): titlenavpoints=False, striptitletoc=True, forceunique=False) - urlchaptercount = int(story.getMetadata('numChapters')) - if chaptercount == urlchaptercount: # and not onlyoverwriteifnewer: - raise NotGoingToDownload("Already contains %d chapters."%chaptercount,'edit-undo.png') + if chaptercount == urlchaptercount: + if collision == UPDATE: + raise NotGoingToDownload("Already contains %d chapters."%chaptercount,'edit-undo.png') + else: + # UPDATEALWAYS + skip_date_update = True elif chaptercount > urlchaptercount: - raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d." % (chaptercount,urlchaptercount),'dialog_error.png') + raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png') if collision == OVERWRITE and \ db.has_format(book_id,formmapping[fileform],index_is_id=True): @@ -515,6 +517,16 @@ class FanFictionDownLoaderPlugin(InterfaceAction): print("existing epub tmp:"+tmp.name) book['epub_for_update'] = tmp.name + book['pubdate'] = story.getMetadataRaw('datePublished') + + if collision != CALIBREONLY and not skip_date_update: + # I'm half convinced this should be dateUpdated instead, but + # this behavior matches how epubs come out when imported + # dateCreated == packaged--epub/etc created. + book['timestamp'] = story.getMetadataRaw('dateCreated') + else: + book['timestamp'] = None + if book['good']: # there shouldn't be any !'good' books at this point. # if still 'good', make a temp file to write the output to. tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'], @@ -700,7 +712,10 @@ class FanFictionDownLoaderPlugin(InterfaceAction): old_tags = filter( lambda x : not x.startswith("Last Update"), old_tags) # mi.tags needs to be list, but set kills dups. mi.tags = list(set(list(old_tags)+mi.tags)) - + # Set language english, but only if not already set. + oldmi = db.get_metadata(book_id,index_is_id=True) + if not oldmi.languages: + mi.languages=['eng'] db.set_metadata(book_id,mi) def _get_clean_reading_lists(self,lists): @@ -772,7 +787,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): mi.set_identifiers({'url':book['url']}) mi.publisher = book['publisher'] mi.tags = book['tags'] - #mi.languages = ['en'] + #mi.languages = ['en'] # handled in _update_metadata so it can check for existing lang. mi.pubdate = book['pubdate'] mi.timestamp = book['timestamp'] mi.comments = book['comments'] @@ -861,7 +876,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction): if db.has_format(book_id,'EPUB',index_is_id=True): existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) mi = get_metadata(existingepub,'EPUB') - #print("mi:%s"%mi) identifiers = mi.get_identifiers() if 'url' in identifiers: #print("url from epub:"+identifiers['url'].replace('|',':')) diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index b1a6dd7e..33372c50 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -44,8 +44,10 @@ def do_download_worker(book_list, options, print(options['version']) total = 0 # Queue all the jobs + print("Adding jobs for URLs:") for book in book_list: if book['good']: + print("%s"%book['url']) total += 1 args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', 'do_download_for_worker', diff --git a/fanficdownloader/adapters/adapter_tthfanficorg.py b/fanficdownloader/adapters/adapter_tthfanficorg.py index 3964172f..2da5e7c3 100644 --- a/fanficdownloader/adapters/adapter_tthfanficorg.py +++ b/fanficdownloader/adapters/adapter_tthfanficorg.py @@ -54,13 +54,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): return 'www.tthfanfic.org' def getSiteExampleURLs(self): - return "http://www.tthfanfic.org/Story-5583 http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm ttp://www.tthfanfic.org/T-526321777890480578489880055880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm" + return "http://www.tthfanfic.org/Story-5583 http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm http://www.tthfanfic.org/T-526321777890480578489880055880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm" # http://www.tthfanfic.org/T-526321777848988007890480555880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm # http://www.tthfanfic.org/Story-5583 # http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm + # http://www.tthfanfic.org/story.php?no=26093 def getSiteURLPattern(self): - return r"http://www.tthfanfic.org/(T-\d+/)?Story-(?P\d+)(-\d+)?(/.*)?$" + return r"http://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P\d+)(-\d+)?(/.*)?$" # tth won't send you future updates if you aren't 'caught up' # on the story. Login isn't required for F21, but logging in will @@ -151,13 +152,13 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): try: # going to pull part of the meta data from author list page. - logging.debug("author URL: "+self.story.getMetadata('authorUrl')) + logging.debug("**AUTHOR** URL: "+self.story.getMetadata('authorUrl')) authordata = self._fetchUrl(self.story.getMetadata('authorUrl')) authorsoup = bs.BeautifulSoup(authordata) # author can have several pages, scan until we find it. while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ): nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href'] - logging.debug("author nextpage URL: "+nextpage) + logging.debug("**AUTHOR** nextpage URL: "+nextpage) authordata = self._fetchUrl(nextpage) authorsoup = bs.BeautifulSoup(authordata) except urllib2.HTTPError, e: