diff --git a/calibre-plugin/fff_plugin.py b/calibre-plugin/fff_plugin.py index ed8e73c7..dad7ccb4 100644 --- a/calibre-plugin/fff_plugin.py +++ b/calibre-plugin/fff_plugin.py @@ -30,7 +30,7 @@ from calibre.constants import numeric_version as calibre_version from calibre.ptempfile import PersistentTemporaryFile, PersistentTemporaryDirectory, remove_dir from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.meta import get_metadata +from calibre.ebooks.metadata.meta import get_metadata as calibre_get_metadata from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dialog from calibre.gui2.dialogs.message_box import ViewLog from calibre.gui2.dialogs.confirm_delete import confirm @@ -55,20 +55,34 @@ try: except: HAS_CALGC=False -from calibre_plugins.fanficfare_plugin.common_utils import (set_plugin_icon_resources, get_icon, - create_menu_action_unique, get_library_uuid) +from calibre_plugins.fanficfare_plugin.common_utils import ( + set_plugin_icon_resources, get_icon, create_menu_action_unique, + get_library_uuid) -from calibre_plugins.fanficfare_plugin.fanficfare import adapters, exceptions -from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html -from calibre_plugins.fanficfare_plugin.fanficfare.geturls import get_urls_from_page, get_urls_from_html, get_urls_from_text, get_urls_from_imap +from calibre_plugins.fanficfare_plugin.fanficfare import ( + adapters, exceptions) + +from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import ( + get_dcsource, get_dcsource_chaptercount, get_story_url_from_html, + get_epub_metadata) + +from calibre_plugins.fanficfare_plugin.fanficfare.geturls import ( + get_urls_from_page, get_urls_from_html,get_urls_from_text, + get_urls_from_imap) + +from calibre_plugins.fanficfare_plugin.fff_util import ( + get_fff_adapter, get_fff_config, get_fff_personalini) + +from calibre_plugins.fanficfare_plugin.config import ( + permitted_values, rejecturllist) + +from calibre_plugins.fanficfare_plugin.prefs import ( + prefs, SAVE_YES, SAVE_NO, SAVE_YES_IF_IMG, SAVE_YES_UNLESS_IMG) -from calibre_plugins.fanficfare_plugin.fff_util import (get_fff_adapter, get_fff_config, get_fff_personalini) -from calibre_plugins.fanficfare_plugin.config import (permitted_values, rejecturllist) -from calibre_plugins.fanficfare_plugin.prefs import (prefs, SAVE_YES, SAVE_NO, - SAVE_YES_IF_IMG, SAVE_YES_UNLESS_IMG) from calibre_plugins.fanficfare_plugin.dialogs import ( AddNewDialog, UpdateExistingDialog, - LoopProgressDialog, UserPassDialog, AboutDialog, CollectURLDialog, RejectListDialog, EmailPassDialog, + LoopProgressDialog, UserPassDialog, AboutDialog, CollectURLDialog, + RejectListDialog, EmailPassDialog, OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, NotGoingToDownload, RejectUrlEntry ) @@ -876,8 +890,6 @@ class FanFicFarePlugin(InterfaceAction): # book has already been flagged bad for whatever reason. return - skip_date_update = False - adapter = get_fff_adapter(url,fileform) ## save and share cookiejar and pagecache between all ## downloads. @@ -888,67 +900,88 @@ class FanFicFarePlugin(InterfaceAction): options['cookiejar'] = adapter.get_empty_cookiejar() adapter.set_cookiejar(options['cookiejar']) - # reduce foreground sleep time for ffnet when few books. - if 'ffnetcount' in options and \ - adapter.getConfig('tweak_fg_sleep') and \ - adapter.getSiteDomain() == 'www.fanfiction.net': - minslp = float(adapter.getConfig('min_fg_sleep')) - maxslp = float(adapter.getConfig('max_fg_sleep')) - dwnlds = float(adapter.getConfig('max_fg_sleep_at_downloads')) - m = (maxslp-minslp) / (dwnlds-1) - b = minslp - m - slp = min(maxslp,m*float(options['ffnetcount'])+b) - #print("m:%s b:%s = %s"%(m,b,slp)) - adapter.set_sleep(slp) - - ## three tries, that's enough if both user/pass & is_adult needed, - ## or a couple tries of one or the other - for x in range(0,2): - try: - adapter.getStoryMetadataOnly(get_cover=False) - except exceptions.FailedToLogin, f: - logger.warn("Login Failed, Need Username/Password.") - userpass = UserPassDialog(self.gui,url,f) - userpass.exec_() # exec_ will make it act modal - if userpass.status: - adapter.username = userpass.user.text() - adapter.password = userpass.passwd.text() - - except exceptions.AdultCheckRequired: - if question_dialog(self.gui, _('Are You an Adult?'), '
'+ - _("%s requires that you be an adult. Please confirm you are an adult in your locale:")%url, - show_copy_button=False): - adapter.is_adult=True - - # let other exceptions percolate up. - story = adapter.getStoryMetadataOnly(get_cover=False) - - series = story.getMetadata('series') - if not merge and series and prefs['checkforseriesurlid']: - # try to find *series anthology* by *seriesUrl* identifier url or uri first. - searchstr = self.make_id_searchstr(story.getMetadata('seriesUrl')) - identicalbooks = db.search_getting_ids(searchstr, None) - # print("searchstr:%s"%searchstr) - # print("identicalbooks:%s"%identicalbooks) - if len(identicalbooks) > 0 and question_dialog(self.gui, _('Skip Story?'),''' -
%s
-%s
-%s
- '''%( - _('Skip Anthology Story?'), - _('"%s" is in series "%s" that you have an anthology book for.')%(story.getMetadata('title'),story.getMetadata('seriesUrl'),series[:series.index(' [')]), - _("Click 'Yes' to Skip."), - _("Click 'No' to download anyway.")), - show_copy_button=False): - book['comment'] = _("Story in Series Anthology(%s).")%series - book['title'] = story.getMetadata('title') - book['author'] = [story.getMetadata('author')] - book['good']=False - book['icon']='rotate-right.png' - book['status'] = _('Skipped') - return + ## XXX get_epub_metadata works, but how to use it? + if 1==0 and collision in (CALIBREONLY) and \ + fileform == 'epub' and \ + db.has_format(book['calibre_id'],'EPUB',index_is_id=True): + adapter.setStoryMetadata(get_epub_metadata(StringIO(db.format(book['calibre_id'],'EPUB', + index_is_id=True)))) + # let other exceptions percolate up. + story = adapter.getStoryMetadataOnly(get_cover=False) + else: + # reduce foreground sleep time for ffnet when few books. + if 'ffnetcount' in options and \ + adapter.getConfig('tweak_fg_sleep') and \ + adapter.getSiteDomain() == 'www.fanfiction.net': + minslp = float(adapter.getConfig('min_fg_sleep')) + maxslp = float(adapter.getConfig('max_fg_sleep')) + dwnlds = float(adapter.getConfig('max_fg_sleep_at_downloads')) + m = (maxslp-minslp) / (dwnlds-1) + b = minslp - m + slp = min(maxslp,m*float(options['ffnetcount'])+b) + #print("m:%s b:%s = %s"%(m,b,slp)) + adapter.set_sleep(slp) + + ## three tries, that's enough if both user/pass & is_adult needed, + ## or a couple tries of one or the other + for x in range(0,2): + try: + adapter.getStoryMetadataOnly(get_cover=False) + except exceptions.FailedToLogin, f: + logger.warn("Login Failed, Need Username/Password.") + userpass = UserPassDialog(self.gui,url,f) + userpass.exec_() # exec_ will make it act modal + if userpass.status: + adapter.username = userpass.user.text() + adapter.password = userpass.passwd.text() + + except exceptions.AdultCheckRequired: + if question_dialog(self.gui, _('Are You an Adult?'), ''+ + _("%s requires that you be an adult. Please confirm you are an adult in your locale:")%url, + show_copy_button=False): + adapter.is_adult=True + + # let other exceptions percolate up. + story = adapter.getStoryMetadataOnly(get_cover=False) + + series = story.getMetadata('series') + if not merge and series and prefs['checkforseriesurlid']: + # try to find *series anthology* by *seriesUrl* identifier url or uri first. + searchstr = self.make_id_searchstr(story.getMetadata('seriesUrl')) + identicalbooks = db.search_getting_ids(searchstr, None) + # print("searchstr:%s"%searchstr) + # print("identicalbooks:%s"%identicalbooks) + if len(identicalbooks) > 0 and question_dialog(self.gui, _('Skip Story?'),''' +
%s
+%s
+%s
+ '''%( + _('Skip Anthology Story?'), + _('"%s" is in series "%s" that you have an anthology book for.')%(story.getMetadata('title'),story.getMetadata('seriesUrl'),series[:series.index(' [')]), + _("Click 'Yes' to Skip."), + _("Click 'No' to download anyway.")), + show_copy_button=False): + book['comment'] = _("Story in Series Anthology(%s).")%series + book['title'] = story.getMetadata('title') + book['author'] = [story.getMetadata('author')] + book['good']=False + book['icon']='rotate-right.png' + book['status'] = _('Skipped') + return + ## if existing book, populate existing custom column values in + ## metadata. + if 'calibre_id' in book: + book['calibre_columns']={} + for key, column in self.gui.library_view.model().custom_columns.iteritems(): + val = db.get_custom(book['calibre_id'], + label=column['label'], + index_is_id=True) + if val: + #print("(%s)->(%s)"%('calibre.'+key,val)) + # name: calibre.cust.namewithouthash + book['calibre_columns']['calibre_cust_'+key[1:]]={'val':val,'label':column['name']} ################################################################################################################################################33 @@ -1099,9 +1132,6 @@ class FanFicFarePlugin(InterfaceAction): if chaptercount == urlchaptercount: if collision == UPDATE: raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png') - else: - # UPDATEALWAYS - skip_date_update = True elif chaptercount > urlchaptercount: raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png') elif chaptercount == 0: @@ -1151,17 +1181,6 @@ class FanFicFarePlugin(InterfaceAction): logger.debug("outfile:"+tmp.name) book['outfile'] = tmp.name - # cookiejar = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100], - # suffix='.cookiejar', - # dir=options['tdir']) - # adapter.save_cookiejar(cookiejar.name) - # book['cookiejar'] = cookiejar.name - # pagecache = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100], - # suffix='.pagecache', - # dir=options['tdir']) - # adapter.save_pagecache(pagecache.name) - # book['pagecache'] = pagecache.name - return def start_download_job(self,book_list, @@ -1763,7 +1782,7 @@ class FanFicFarePlugin(InterfaceAction): or (prefs['updatecalcover'] == SAVE_YES_IF_IMG ## yes, if image. and book['all_metadata']['cover_image'] )): # in ('specific','first','default','old') existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) - epubmi = get_metadata(existingepub,'EPUB') + epubmi = calibre_get_metadata(existingepub,'EPUB') if epubmi.cover_data[1] is not None: try: db.set_cover(book_id, epubmi.cover_data[1]) @@ -1834,9 +1853,9 @@ class FanFicFarePlugin(InterfaceAction): if cover_generated and prefs['gc_polish_cover'] and \ options['fileform'] == "epub": # set cover inside epub from calibre's polish feature + from collections import namedtuple from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS from calibre.utils.logging import Log - from collections import namedtuple # Couldn't find a better way to get the cover path. cover_path = os.path.join(db.library_path, @@ -2049,7 +2068,7 @@ class FanFicFarePlugin(InterfaceAction): existingepub = None if path == None and db.has_format(book_id,'EPUB',index_is_id=True): existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) - mi = get_metadata(existingepub,'EPUB') + mi = calibre_get_metadata(existingepub,'EPUB') identifiers = mi.get_identifiers() if 'url' in identifiers: # print("url from get_metadata:%s"%identifiers['url'].replace('|',':')) diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 97d376e3..f2b411ef 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -11,6 +11,7 @@ import logging logger = logging.getLogger(__name__) import time, traceback +from StringIO import StringIO from calibre.utils.ipc.server import Server from calibre.utils.ipc.job import ParallelJob @@ -162,14 +163,7 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): # preserve logfile even on overwrite. if 'epub_for_update' in book: - (urlignore, - chaptercountignore, - oldchaptersignore, - oldimgsignore, - oldcoverignore, - calibrebookmarkignore, - # only logfile set in adapter, so others aren't used. - adapter.logfile) = get_update_data(book['epub_for_update']) + adapter.logfile = get_update_data(book['epub_for_update']).logfile # change the existing entries id to notid so # write_epub writes a whole new set to indicate overwrite. @@ -177,6 +171,7 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): adapter.logfile = adapter.logfile.replace("span id","span notid") logger.info("write to %s"%outfile) + inject_cal_cols(book,story,configuration) writer.writeStory(outfilename=outfile, forceOverwrite=True) book['comment'] = 'Download %s completed, %s chapters.'%(options['fileform'],story.getMetadata("numChapters")) book['all_metadata'] = story.getAllMetadata(removeallentities=True) @@ -193,7 +188,7 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): adapter.oldimgs, adapter.oldcover, adapter.calibrebookmark, - adapter.logfile) = get_update_data(book['epub_for_update']) + adapter.logfile) = get_update_data(book['epub_for_update'])[0:7] # dup handling from fff_plugin needed for anthology updates. if options['collision'] == UPDATE: @@ -214,6 +209,7 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): logger.info("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)) logger.info("write to %s"%outfile) + inject_cal_cols(book,story,configuration) writer.writeStory(outfilename=outfile, forceOverwrite=True) book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\ @@ -253,3 +249,22 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): #time.sleep(10) return book + +## calibre's columns for an existing book are pased in and injected +## into the story's metadata. For convenience, we also add labels and +## valid_entries for them in a special [injected] section that has +## even less precedence than [defaults] +def inject_cal_cols(book,story,configuration): + configuration.remove_section('injected') + if 'calibre_columns' in book: + injectini = ['[injected]'] + extra_valid = [] + for k, v in book['calibre_columns'].iteritems(): + story.setMetadata(k,v['val']) + injectini.append('%s_label:%s'%(k,v['label'])) + extra_valid.append(k) + if extra_valid: # if empty, there's nothing to add. + injectini.append("add_to_extra_valid_entries:,"+','.join(extra_valid)) + configuration.readfp(StringIO('\n'.join(injectini))) + #print("added:\n%s\n"%('\n'.join(injectini))) + diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py index 6ef784ce..edcb4559 100644 --- a/fanficfare/adapters/base_adapter.py +++ b/fanficfare/adapters/base_adapter.py @@ -404,6 +404,12 @@ class BaseSiteAdapter(Configurable): self.metadataDone = True return self.story + def setStoryMetadata(self,metadata): + self.story.metadata = metadata + self.metadataDone = True + if not self.story.getMetadataRaw('dateUpdated'): + self.story.setMetadata('dateUpdated',self.story.getMetadataRaw('datePublished')) + def hookForUpdates(self,chaptercount): "Usually not needed." return chaptercount diff --git a/fanficfare/cli.py b/fanficfare/cli.py index b4cb4501..a14f9de3 100644 --- a/fanficfare/cli.py +++ b/fanficfare/cli.py @@ -318,7 +318,13 @@ def do_download(arg, # update now handled by pre-populating the old # images and chapters in the adapter rather than # merging epubs. - url, chaptercount, adapter.oldchapters, adapter.oldimgs, adapter.oldcover, adapter.calibrebookmark, adapter.logfile = get_update_data(output_filename) + (url, + chaptercount, + adapter.oldchapters, + adapter.oldimgs, + adapter.oldcover, + adapter.calibrebookmark, + adapter.logfile) = (get_update_data(output_filename))[0:7] print 'Do update - epub(%d) vs url(%d)' % (chaptercount, urlchaptercount) diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index 6cb96467..69057ea3 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -304,7 +304,8 @@ class Configuration(ConfigParser.SafeConfigParser): self.linenos=dict() # key by section or section,key -> lineno - self.sectionslist = ['defaults'] + ## [injected] section has even less priority than [defaults] + self.sectionslist = ['defaults','injected'] if site.startswith("www."): sitewith = site diff --git a/fanficfare/epubutils.py b/fanficfare/epubutils.py index 0436656f..c5560a8e 100644 --- a/fanficfare/epubutils.py +++ b/fanficfare/epubutils.py @@ -10,17 +10,27 @@ __docformat__ = 'restructuredtext en' import logging logger = logging.getLogger(__name__) -import re, os, traceback +from collections import namedtuple + +import re, os, traceback, json, datetime from zipfile import ZipFile from xml.dom.minidom import parseString import bs4 as bs +UpdateData = namedtuple('UpdateData', + 'source filecount soups images oldcover ' + 'calibrebookmark logfile metadata') + def get_dcsource(inputio): - return get_update_data(inputio,getfilecount=False,getsoups=False)[0] + return get_update_data(inputio,getfilecount=False,getsoups=False).source def get_dcsource_chaptercount(inputio): - return get_update_data(inputio,getfilecount=True,getsoups=False)[:2] # (source,filecount) + nt = get_update_data(inputio,getfilecount=True,getsoups=False) + return (nt.source,nt.filecount) + +def get_epub_metadata(inputio): + return get_update_data(inputio,getfilecount=False,getsoups=False).metadata def get_update_data(inputio, getfilecount=True, @@ -42,7 +52,7 @@ def get_update_data(inputio, ## Save the path to the .opf file--hrefs inside it are relative to it. relpath = get_path_part(rootfilename) - + oldcover = None calibrebookmark = None logfile = None @@ -136,19 +146,33 @@ def get_update_data(inputio, for skip in soup.findAll(attrs={'class':'skip_on_ffdl_update'}): skip.extract() - + soups.append(soup) - + filecount+=1 try: calibrebookmark = epub.read("META-INF/calibre_bookmarks.txt") except: pass - + + + metadata = {} + try: + for meta in firstmetadom.getElementsByTagName("meta"): + if meta.getAttribute("name")=="fanficfare:story_metadata": + #print("meta.getAttribute(content):%s"%meta.getAttribute("content")) + metadata=jsonloads(meta.getAttribute("content")) + except Exception as e: + pass + # logger.info("metadata %s not found") + # logger.info("Exception: %s"%(unicode(e))) + # traceback.print_exc() + #for k in images.keys(): #print("\tlongdesc:%s\n\tData len:%s\n"%(k,len(images[k]))) - return (source,filecount,soups,images,oldcover,calibrebookmark,logfile) + return UpdateData(source,filecount,soups,images,oldcover, + calibrebookmark,logfile,metadata) def get_path_part(n): relpath = os.path.dirname(n) @@ -172,7 +196,7 @@ def get_story_url_from_html(inputio,_is_good_url=None): ## Save the path to the .opf file--hrefs inside it are relative to it. relpath = get_path_part(rootfilename) - + # spin through the manifest--only place there are item tags. for item in contentdom.getElementsByTagName("item"): # First, count the 'chapter' files. FFF uses file0000.xhtml, @@ -192,3 +216,37 @@ def get_story_url_from_html(inputio,_is_good_url=None): if _is_good_url == None or _is_good_url(ahref): return ahref return None + +## why json doesn't define a date/time format is beyond me... +def datetime_decoder(d): + if isinstance(d, list): + pairs = enumerate(d) + elif isinstance(d, dict): + pairs = d.items() + result = [] + for k,v in pairs: + if isinstance(v, basestring): + try: + # The %f format code is only supported in Python >= 2.6. + # For Python <= 2.5 strip off microseconds + # v = datetime.datetime.strptime(v.rsplit('.', 1)[0], + # '%Y-%m-%dT%H:%M:%S') + v = datetime.datetime.strptime(v, '%Y-%m-%dT%H:%M:%S.%f') + except ValueError: + try: + v = datetime.datetime.strptime(v, '%Y-%m-%dT%H:%M:%S') + except ValueError: + try: + v = datetime.datetime.strptime(v, '%Y-%m-%d') + except ValueError: + pass + elif isinstance(v, (dict, list)): + v = datetime_decoder(v) + result.append((k, v)) + if isinstance(d, list): + return [x[1] for x in result] + elif isinstance(d, dict): + return dict(result) + +def jsonloads(obj): + return json.loads(obj, object_hook=datetime_decoder) diff --git a/fanficfare/writers/writer_epub.py b/fanficfare/writers/writer_epub.py index 07a9654a..e6df3655 100644 --- a/fanficfare/writers/writer_epub.py +++ b/fanficfare/writers/writer_epub.py @@ -22,6 +22,8 @@ import zipfile from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED import urllib import re +import json +import datetime ## XML isn't as forgiving as HTML, so rather than generate as strings, ## use DOM to generate the XML files. @@ -46,7 +48,7 @@ class EpubWriter(BaseStoryWriter): BaseStoryWriter.__init__(self, config, story) self.EPUB_CSS = string.Template('''${output_css}''') - + self.EPUB_TITLE_PAGE_START = string.Template(''' @@ -120,7 +122,7 @@ ${value}''') - + self.EPUB_LOG_ENTRY = string.Template(''' ${label}: ${value} ''') - + self.EPUB_LOG_UPDATE_END = string.Template('''