Plugin--Fix for Update Always w/fewer chapters on site. Don't update Date

column when doing Calibre MetaData Only or Update Always with no new chapters. Set Language to English if it's not already set. Add some additional logging. Support other URL form for TTH.
2026-01-05 15:44:14 +01:00 · 2012-01-16 23:34:10 -06:00 · 2012-01-16 23:34:10 -06:00 · 45382ad424
commit 45382ad424
parent 37fce63735
4 changed files with 32 additions and 15 deletions
--- a/calibre-plugin/init.py
+++ b/calibre-plugin/init.py
@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
    description         = 'UI plugin to download FanFiction stories from various sites.'
    supported_platforms = ['windows', 'osx', 'linux']
    author              = 'Jim Miller'
-    version             = (1, 2, 0)
+    version             = (1, 2, 1)
    minimum_calibre_version = (0, 8, 30)

    #: This field defines the GUI plugin class that contains all the code
--- a/calibre-plugin/ffdl_plugin.py
+++ b/calibre-plugin/ffdl_plugin.py
@ -372,6 +372,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        
        url = book['url']
        print("url:%s"%url)
+        skip_date_update = False
        
        ## was self.ffdlconfig, but we need to be able to change it
        ## when doing epub update.
@ -409,8 +410,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        book['author_sort'] = book['author'] = story.getMetadata("author", removeallentities=True)
        book['publisher'] = story.getMetadata("site")
        book['tags'] = writer.getTags()
-        book['pubdate'] = story.getMetadataRaw('datePublished')
-        book['timestamp'] = story.getMetadataRaw('dateCreated')
        book['comments'] = story.getMetadata("description") #, removeallentities=True) comments handles entities better.
        
        # adapter.opener is the element with a threadlock.  But del
@ -478,7 +477,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
            
            ## newer/chaptercount checks are the same for both:
            # Update epub, but only if more chapters.
-            if collision == UPDATE:
+            if collision in (UPDATE,UPDATEALWAYS): # collision == UPDATE
                # 'book' can exist without epub.  If there's no existing epub,
                # let it go and it will download it.
                if db.has_format(book_id,fileform,index_is_id=True):
@ -489,12 +488,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
                                                     titlenavpoints=False,
                                                     striptitletoc=True,
                                                     forceunique=False)
-    
                    urlchaptercount = int(story.getMetadata('numChapters'))
-                    if chaptercount == urlchaptercount: # and not onlyoverwriteifnewer:
-                        raise NotGoingToDownload("Already contains %d chapters."%chaptercount,'edit-undo.png')
+                    if chaptercount == urlchaptercount:
+                        if collision == UPDATE:
+                            raise NotGoingToDownload("Already contains %d chapters."%chaptercount,'edit-undo.png')
+                        else:
+                            # UPDATEALWAYS
+                            skip_date_update = True
                    elif chaptercount > urlchaptercount:
-                        raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d." % (chaptercount,urlchaptercount),'dialog_error.png')
+                        raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png')
    
            if collision == OVERWRITE and \
                    db.has_format(book_id,formmapping[fileform],index_is_id=True):
@ -515,6 +517,16 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
                print("existing epub tmp:"+tmp.name)
                book['epub_for_update'] = tmp.name

+        book['pubdate'] = story.getMetadataRaw('datePublished')
+
+        if collision != CALIBREONLY and not skip_date_update:
+            # I'm half convinced this should be dateUpdated instead, but
+            # this behavior matches how epubs come out when imported
+            # dateCreated == packaged--epub/etc created.
+            book['timestamp'] = story.getMetadataRaw('dateCreated')
+        else:
+            book['timestamp'] = None
+        
        if book['good']: # there shouldn't be any !'good' books at this point.
            # if still 'good', make a temp file to write the output to.
            tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'],
@ -700,7 +712,10 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
                    old_tags = filter( lambda x : not x.startswith("Last Update"), old_tags)
                    # mi.tags needs to be list, but set kills dups.
                mi.tags = list(set(list(old_tags)+mi.tags)) 
-                        
+        # Set language english, but only if not already set.
+        oldmi = db.get_metadata(book_id,index_is_id=True)
+        if not oldmi.languages:
+            mi.languages=['eng']
        db.set_metadata(book_id,mi)

    def _get_clean_reading_lists(self,lists):
@ -772,7 +787,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        mi.set_identifiers({'url':book['url']})
        mi.publisher = book['publisher']
        mi.tags = book['tags']
-        #mi.languages = ['en']
+        #mi.languages = ['en'] # handled in _update_metadata so it can check for existing lang.
        mi.pubdate = book['pubdate']
        mi.timestamp = book['timestamp']
        mi.comments = book['comments']
@ -861,7 +876,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
            if db.has_format(book_id,'EPUB',index_is_id=True):
                existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True)
                mi = get_metadata(existingepub,'EPUB')
-                #print("mi:%s"%mi)
                identifiers = mi.get_identifiers()
                if 'url' in identifiers:
                    #print("url from epub:"+identifiers['url'].replace('|',':'))
--- a/calibre-plugin/jobs.py
+++ b/calibre-plugin/jobs.py
@ -44,8 +44,10 @@ def do_download_worker(book_list, options,
    print(options['version'])
    total = 0
    # Queue all the jobs
+    print("Adding jobs for URLs:")
    for book in book_list:
        if book['good']:
+            print("%s"%book['url'])
            total += 1
            args = ['calibre_plugins.fanfictiondownloader_plugin.jobs',
                    'do_download_for_worker',
--- a/fanficdownloader/adapters/adapter_tthfanficorg.py
+++ b/fanficdownloader/adapters/adapter_tthfanficorg.py
@ -54,13 +54,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
        return 'www.tthfanfic.org'

    def getSiteExampleURLs(self):
-        return "http://www.tthfanfic.org/Story-5583 http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm ttp://www.tthfanfic.org/T-526321777890480578489880055880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm"
+        return "http://www.tthfanfic.org/Story-5583 http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm http://www.tthfanfic.org/T-526321777890480578489880055880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm"

    # http://www.tthfanfic.org/T-526321777848988007890480555880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm
    # http://www.tthfanfic.org/Story-5583
    # http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm
+    # http://www.tthfanfic.org/story.php?no=26093
    def getSiteURLPattern(self):
-        return r"http://www.tthfanfic.org/(T-\d+/)?Story-(?P<id>\d+)(-\d+)?(/.*)?$"
+        return r"http://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P<id>\d+)(-\d+)?(/.*)?$"

    # tth won't send you future updates if you aren't 'caught up'
    # on the story.  Login isn't required for F21, but logging in will
@ -151,13 +152,13 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):

        try:
            # going to pull part of the meta data from author list page.
-            logging.debug("author URL: "+self.story.getMetadata('authorUrl'))
+            logging.debug("**AUTHOR** URL: "+self.story.getMetadata('authorUrl'))
            authordata = self._fetchUrl(self.story.getMetadata('authorUrl'))
            authorsoup = bs.BeautifulSoup(authordata)
            # author can have several pages, scan until we find it.
            while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ):
                nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href']
-                logging.debug("author nextpage URL: "+nextpage)
+                logging.debug("**AUTHOR** nextpage URL: "+nextpage)
                authordata = self._fetchUrl(nextpage)
                authorsoup = bs.BeautifulSoup(authordata)
        except urllib2.HTTPError, e: