Plugin--Fix for Update Always w/fewer chapters on site. Don't update Date

column when doing Calibre MetaData Only or Update Always with no new chapters.
Set Language to English if it's not already set.  Add some additional logging.
Support other URL form for TTH.
This commit is contained in:
Jim Miller 2012-01-16 23:34:10 -06:00
parent 37fce63735
commit 45382ad424
4 changed files with 32 additions and 15 deletions

View file

@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
description = 'UI plugin to download FanFiction stories from various sites.'
supported_platforms = ['windows', 'osx', 'linux']
author = 'Jim Miller'
version = (1, 2, 0)
version = (1, 2, 1)
minimum_calibre_version = (0, 8, 30)
#: This field defines the GUI plugin class that contains all the code

View file

@ -372,6 +372,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
url = book['url']
print("url:%s"%url)
skip_date_update = False
## was self.ffdlconfig, but we need to be able to change it
## when doing epub update.
@ -409,8 +410,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
book['author_sort'] = book['author'] = story.getMetadata("author", removeallentities=True)
book['publisher'] = story.getMetadata("site")
book['tags'] = writer.getTags()
book['pubdate'] = story.getMetadataRaw('datePublished')
book['timestamp'] = story.getMetadataRaw('dateCreated')
book['comments'] = story.getMetadata("description") #, removeallentities=True) comments handles entities better.
# adapter.opener is the element with a threadlock. But del
@ -478,7 +477,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
## newer/chaptercount checks are the same for both:
# Update epub, but only if more chapters.
if collision == UPDATE:
if collision in (UPDATE,UPDATEALWAYS): # collision == UPDATE
# 'book' can exist without epub. If there's no existing epub,
# let it go and it will download it.
if db.has_format(book_id,fileform,index_is_id=True):
@ -489,12 +488,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
titlenavpoints=False,
striptitletoc=True,
forceunique=False)
urlchaptercount = int(story.getMetadata('numChapters'))
if chaptercount == urlchaptercount: # and not onlyoverwriteifnewer:
raise NotGoingToDownload("Already contains %d chapters."%chaptercount,'edit-undo.png')
if chaptercount == urlchaptercount:
if collision == UPDATE:
raise NotGoingToDownload("Already contains %d chapters."%chaptercount,'edit-undo.png')
else:
# UPDATEALWAYS
skip_date_update = True
elif chaptercount > urlchaptercount:
raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d." % (chaptercount,urlchaptercount),'dialog_error.png')
raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png')
if collision == OVERWRITE and \
db.has_format(book_id,formmapping[fileform],index_is_id=True):
@ -515,6 +517,16 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
print("existing epub tmp:"+tmp.name)
book['epub_for_update'] = tmp.name
book['pubdate'] = story.getMetadataRaw('datePublished')
if collision != CALIBREONLY and not skip_date_update:
# I'm half convinced this should be dateUpdated instead, but
# this behavior matches how epubs come out when imported
# dateCreated == packaged--epub/etc created.
book['timestamp'] = story.getMetadataRaw('dateCreated')
else:
book['timestamp'] = None
if book['good']: # there shouldn't be any !'good' books at this point.
# if still 'good', make a temp file to write the output to.
tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'],
@ -700,7 +712,10 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
old_tags = filter( lambda x : not x.startswith("Last Update"), old_tags)
# mi.tags needs to be list, but set kills dups.
mi.tags = list(set(list(old_tags)+mi.tags))
# Set language english, but only if not already set.
oldmi = db.get_metadata(book_id,index_is_id=True)
if not oldmi.languages:
mi.languages=['eng']
db.set_metadata(book_id,mi)
def _get_clean_reading_lists(self,lists):
@ -772,7 +787,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
mi.set_identifiers({'url':book['url']})
mi.publisher = book['publisher']
mi.tags = book['tags']
#mi.languages = ['en']
#mi.languages = ['en'] # handled in _update_metadata so it can check for existing lang.
mi.pubdate = book['pubdate']
mi.timestamp = book['timestamp']
mi.comments = book['comments']
@ -861,7 +876,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
if db.has_format(book_id,'EPUB',index_is_id=True):
existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True)
mi = get_metadata(existingepub,'EPUB')
#print("mi:%s"%mi)
identifiers = mi.get_identifiers()
if 'url' in identifiers:
#print("url from epub:"+identifiers['url'].replace('|',':'))

View file

@ -44,8 +44,10 @@ def do_download_worker(book_list, options,
print(options['version'])
total = 0
# Queue all the jobs
print("Adding jobs for URLs:")
for book in book_list:
if book['good']:
print("%s"%book['url'])
total += 1
args = ['calibre_plugins.fanfictiondownloader_plugin.jobs',
'do_download_for_worker',

View file

@ -54,13 +54,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
return 'www.tthfanfic.org'
def getSiteExampleURLs(self):
return "http://www.tthfanfic.org/Story-5583 http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm ttp://www.tthfanfic.org/T-526321777890480578489880055880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm"
return "http://www.tthfanfic.org/Story-5583 http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm http://www.tthfanfic.org/T-526321777890480578489880055880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm"
# http://www.tthfanfic.org/T-526321777848988007890480555880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm
# http://www.tthfanfic.org/Story-5583
# http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm
# http://www.tthfanfic.org/story.php?no=26093
def getSiteURLPattern(self):
return r"http://www.tthfanfic.org/(T-\d+/)?Story-(?P<id>\d+)(-\d+)?(/.*)?$"
return r"http://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P<id>\d+)(-\d+)?(/.*)?$"
# tth won't send you future updates if you aren't 'caught up'
# on the story. Login isn't required for F21, but logging in will
@ -151,13 +152,13 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
try:
# going to pull part of the meta data from author list page.
logging.debug("author URL: "+self.story.getMetadata('authorUrl'))
logging.debug("**AUTHOR** URL: "+self.story.getMetadata('authorUrl'))
authordata = self._fetchUrl(self.story.getMetadata('authorUrl'))
authorsoup = bs.BeautifulSoup(authordata)
# author can have several pages, scan until we find it.
while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ):
nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href']
logging.debug("author nextpage URL: "+nextpage)
logging.debug("**AUTHOR** nextpage URL: "+nextpage)
authordata = self._fetchUrl(nextpage)
authorsoup = bs.BeautifulSoup(authordata)
except urllib2.HTTPError, e: