diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index 4ea3269e..11160770 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -732,6 +732,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): options['version'] = self.version logger.debug(self.version) + options['personal.ini'] = get_ffdl_personalini() #print("prep_downloads:%s"%books) @@ -825,8 +826,16 @@ class FanFictionDownLoaderPlugin(InterfaceAction): skip_date_update = False - options['personal.ini'] = get_ffdl_personalini() adapter = get_ffdl_adapter(url,fileform) + ## save and share cookiejar and pagecache between all + ## downloads. + if 'pagecache' not in options: + options['pagecache'] = adapter.get_empty_pagecache() + adapter.set_pagecache(options['pagecache']) + if 'cookiejar' not in options: + options['cookiejar'] = adapter.get_empty_cookiejar() + adapter.set_cookiejar(options['cookiejar']) + # reduce foreground sleep time for ffnet when few books. if 'ffnetcount' in options and \ adapter.getConfig('tweak_fg_sleep') and \ @@ -844,7 +853,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): ## or a couple tries of one or the other for x in range(0,2): try: - adapter.getStoryMetadataOnly() + adapter.getStoryMetadataOnly(get_cover=False) except exceptions.FailedToLogin, f: logger.warn("Login Failed, Need Username/Password.") userpass = UserPassDialog(self.gui,url,f) @@ -860,7 +869,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): adapter.is_adult=True # let other exceptions percolate up. - story = adapter.getStoryMetadataOnly() + story = adapter.getStoryMetadataOnly(get_cover=False) series = story.getMetadata('series') if not merge and series and prefs['checkforseriesurlid']: @@ -1088,7 +1097,18 @@ class FanFictionDownLoaderPlugin(InterfaceAction): dir=options['tdir']) logger.debug("title:"+book['title']) logger.debug("outfile:"+tmp.name) - book['outfile'] = tmp.name + book['outfile'] = tmp.name + + # cookiejar = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100], + # suffix='.cookiejar', + # dir=options['tdir']) + # adapter.save_cookiejar(cookiejar.name) + # book['cookiejar'] = cookiejar.name + # pagecache = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100], + # suffix='.pagecache', + # dir=options['tdir']) + # adapter.save_pagecache(pagecache.name) + # book['pagecache'] = pagecache.name return @@ -1145,7 +1165,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction): _('FFDL log'), _('FFDL download ended'), msg, show_copy_button=False) return - + + cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar', + dir=options['tdir']) + options['cookiejar'].save(cookiejarfile.name, + ignore_discard=True, + ignore_expires=True) + options['cookiejarfile']=cookiejarfile.name + del options['cookiejar'] ## can't be pickled. + func = 'arbitrary_n' cpus = self.gui.job_manager.server.pool_size args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', 'do_download_worker', @@ -1464,7 +1492,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): elif prefs['autoconvert']: ## 'Convert Book'.auto_convert_auto_add doesn't convert if ## the format is already there. - fmt = calibre_prefs['output_format'].upper() # formmapping is upper. + fmt = calibre_prefs['output_format'] # delete if there, but not if the format we just made. if fmt != formmapping[options['fileform']] and \ db.has_format(book_id,fmt,index_is_id=True): diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index ee8a5f2b..fbae9c6c 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -19,6 +19,11 @@ from calibre.utils.ipc.server import Server from calibre.utils.ipc.job import ParallelJob from calibre.constants import numeric_version as calibre_version +# for smarten punc +from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS +from calibre.utils.logging import Log +from collections import namedtuple + from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload, OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY) from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions @@ -58,10 +63,6 @@ def do_download_worker(book_list, options, done=None, args=args) job._book = book - # job._book_id = book_id - # job._title = title - # job._modified_date = modified_date - # job._existing_isbn = existing_isbn server.add_job(job) else: # was already bad before the subprocess ever started. @@ -69,7 +70,7 @@ def do_download_worker(book_list, options, # This server is an arbitrary_n job, so there is a notifier available. # Set the % complete to a small number to avoid the 'unavailable' indicator - notification(0.01, 'Downloading FanFiction Stories') + notification(0.01, _('Downloading FanFiction Stories')) # dequeue the job results as they arrive, saving the results count = 0 @@ -81,24 +82,19 @@ def do_download_worker(book_list, options, if not job.is_finished: continue # A job really finished. Get the information. - output_book = job.result - #print("output_book:%s"%output_book) book_list.remove(job._book) book_list.append(job.result) book_id = job._book['calibre_id'] - #title = job._title count = count + 1 notification(float(count)/total, '%d of %d stories finished downloading'%(count,total)) # Add this job's output to the current log logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title'])) logger.info(job.details) - - if count >= total: - logger.info("\nSuccessful:\n%s\n"%("\n".join([book['url'] for book in + logger.info("\n"+_("Successful:")+"\n%s\n"%("\n".join([book['url'] for book in filter(lambda x: x['good'], book_list) ] ) ) ) - logger.info("\nUnsuccessful:\n%s\n"%("\n".join([book['url'] for book in + logger.info("\n"+_("Unsuccessful:")+"\n%s\n"%("\n".join([book['url'] for book in filter(lambda x: not x['good'], book_list) ] ) ) ) break @@ -109,11 +105,10 @@ def do_download_worker(book_list, options, def do_download_for_worker(book,options,notification=lambda x,y:x): ''' - Child job, to extract isbn from formats for this specific book, - when run as a worker job + Child job, to download story when run as a worker job ''' try: - book['comment'] = 'Download started...' + book['comment'] = _('Download started...') configuration = get_ffdl_config(book['url'], options['fileform'], @@ -122,8 +117,8 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): configuration.set("overrides","never_make_cover","true") - # images only for epub, even if the user mistakenly turned it - # on else where. + # images only for epub, html, even if the user mistakenly + # turned it on else where. if options['fileform'] not in ("epub","html"): configuration.set("overrides","include_images","false") @@ -133,6 +128,10 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): adapter.password = book['password'] adapter.setChaptersRange(book['begin'],book['end']) + adapter.load_cookiejar(options['cookiejarfile']) + logger.debug("cookiejar:%s"%adapter.cookiejar) + adapter.set_pagecache(options['pagecache']) + story = adapter.getStoryMetadataOnly() if 'calibre_series' in book: adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1]) @@ -191,13 +190,13 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): # dup handling from ffdl_plugin needed for anthology updates. if options['collision'] == UPDATE: if chaptercount == urlchaptercount: - book['comment']="Already contains %d chapters. Reuse as is."%chaptercount + book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount book['outfile'] = book['epub_for_update'] # for anthology merge ops. return book # dup handling from ffdl_plugin needed for anthology updates. if chaptercount > urlchaptercount: - raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png') + raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png') if not (options['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \ and adapter.getConfig("do_update_hook"): @@ -208,16 +207,12 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): writer.writeStory(outfilename=outfile, forceOverwrite=True) - book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\ + book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\ (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount) if options['smarten_punctuation'] and options['fileform'] == "epub" \ and calibre_version >= (0, 9, 39): # do smarten_punctuation from calibre's polish feature - from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS - from calibre.utils.logging import Log - from collections import namedtuple - data = {'smarten_punctuation':True} opts = ALL_OPTS.copy() opts.update(data) diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py index c6dbdc08..654f1ff5 100644 --- a/fanficdownloader/adapters/adapter_adastrafanficcom.py +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -59,6 +59,13 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + def extractChapterUrlsAndMetadata(self): if self.is_adult or self.getConfig("is_adult"): diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index fa683999..38844932 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -52,6 +52,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): # latest chapter yet and going back to chapter 1 to pull the # chapter list doesn't get the latest. So save and use the # original URL given to pull chapter list & metadata. + # Not used by plugin because URL gets normalized first for + # eliminating duplicate story urls. self.origurl = url if "https://m." in self.origurl: ## accept m(mobile)url, but use www. @@ -74,14 +76,23 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$" - def _fetchUrl(self,url): - time.sleep(1.0) ## ffnet(and, I assume, fpcom) tends to fail - ## more if hit too fast. This is in - ## additional to what ever the - ## slow_down_sleep_time setting is. - return BaseSiteAdapter._fetchUrl(self,url) + def _fetchUrl(self,url,parameters=None,extrasleep=1.0): + # time.sleep(1.0) ## ffnet(and, I assume, fpcom) tends to fail + # ## more if hit too fast. This is in + # ## additional to what ever the + # ## slow_down_sleep_time setting is. + return BaseSiteAdapter._fetchUrl(self,url, + parameters=parameters, + extrasleep=extrasleep) - def extractChapterUrlsAndMetadata(self): + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + + def doExtractChapterUrlsAndMetadata(self,get_cover=True): # fetch the chapter. From that we will get almost all the # metadata and chapter list @@ -256,14 +267,15 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): else: self.story.setMetadata('status', 'In-Progress') - # Try the larger image first. - try: - img = soup.find('img',{'class':'lazy cimage'}) - self.setCoverImage(url,img['data-original']) - except: - img = soup.find('img',{'class':'cimage'}) - if img: - self.setCoverImage(url,img['src']) + if get_cover: + # Try the larger image first. + try: + img = soup.find('img',{'class':'lazy cimage'}) + self.setCoverImage(url,img['data-original']) + except: + img = soup.find('img',{'class':'cimage'}) + if img: + self.setCoverImage(url,img['src']) # Find the chapter selector select = soup.find('select', { 'name' : 'chapter' } ) @@ -287,12 +299,12 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): return def getChapterText(self, url): - time.sleep(4.0) ## ffnet(and, I assume, fpcom) tends to fail - ## more if hit too fast. This is in - ## additional to what ever the - ## slow_down_sleep_time setting is. + # time.sleep(4.0) ## ffnet(and, I assume, fpcom) tends to fail + # ## more if hit too fast. This is in + # ## additional to what ever the + # ## slow_down_sleep_time setting is. logger.debug('Getting chapter text from: %s' % url) - data = self._fetchUrl(url) + data = self._fetchUrl(url,extrasleep=4.0) if "Please email this error message in full to support@fanfiction.com" in data: raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url) diff --git a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py index b146ec73..d7b00536 100644 --- a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py +++ b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py @@ -62,6 +62,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + def needToLoginCheck(self, data): if 'Registered Users Only' in data \ or 'There is no such account on our website' in data \ @@ -120,7 +127,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): url = self.url+'&index=1'+addurl logger.debug("Changing URL: "+url) self.performLogin(url) - data = self._fetchUrl(url) + data = self._fetchUrl(url,usecache=False) if "This story contains mature content which may include violence, sexual situations, and coarse language" in data: raise exceptions.AdultCheckRequired(self.url) diff --git a/fanficdownloader/adapters/adapter_tthfanficorg.py b/fanficdownloader/adapters/adapter_tthfanficorg.py index 73ff270c..d9bc28b2 100644 --- a/fanficdownloader/adapters/adapter_tthfanficorg.py +++ b/fanficdownloader/adapters/adapter_tthfanficorg.py @@ -65,6 +65,13 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return r"http://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P\d+)(-\d+)?(/.*)?$" + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return True + # tth won't send you future updates if you aren't 'caught up' # on the story. Login isn't required for F21, but logging in will # mark stories you've downloaded as 'read' on tth. @@ -136,13 +143,16 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): if self.is_adult or self.getConfig("is_adult"): form = soup.find('form', {'id':'sitemaxratingform'}) - params={'ctkn':form.find('input', {'name':'ctkn'})['value'], - 'sitemaxrating':'5'} - logger.info("Attempting to get rating cookie for %s" % url) - data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params) - # refetch story page. - data = self._fetchUrl(url) - soup = bs.BeautifulSoup(data) + # if is_adult and rating isn't already set to FR21, set it so. + if not form.find('option',{'value':'5'}).get('selected'): + params={'ctkn':form.find('input', {'name':'ctkn'})['value'], + 'sitemaxrating':'5'} + logger.info("Attempting to get rating cookie for %s" % url) + data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params) + # refetch story page. + ## XXX - needs cache invalidate? Or at least check that it this needs doing... + data = self._fetchUrl(url,usecache=False) + soup = bs.BeautifulSoup(data) if "NOTE: This story is rated FR21 which is above your chosen filter level." in data: raise exceptions.AdultCheckRequired(self.url) diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 5fab6a5e..ac443c5d 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -22,7 +22,9 @@ import logging import urllib import urllib2 as u2 import urlparse as up +import cookielib as cl from functools import partial +import pickle from .. import BeautifulSoup as bs from ..htmlcleanup import stripHTML @@ -70,6 +72,14 @@ class BaseSiteAdapter(Configurable): def validateURL(self): return re.match(self.getSiteURLPattern(), self.url) + @staticmethod + def get_empty_cookiejar(): + return cl.LWPCookieJar() + + @staticmethod + def get_empty_pagecache(): + return {} + def __init__(self, configuration, url): Configurable.__init__(self, configuration) @@ -78,8 +88,9 @@ class BaseSiteAdapter(Configurable): self.is_adult=False self.override_sleep = None - - self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor()) + self.cookiejar = self.get_empty_cookiejar() + self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor()) + # self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor()) ## Specific UA because too many sites are blocking the default python UA. self.opener.addheaders = [('User-agent', self.getConfig('user_agent'))] self.storyDone = False @@ -95,6 +106,9 @@ class BaseSiteAdapter(Configurable): self.oldcover = None # (data of existing cover html, data of existing cover image) self.calibrebookmark = None self.logfile = None + + self.pagecache = self.get_empty_pagecache() + ## order of preference for decoding. self.decode = ["utf8", "Windows-1252"] # 1252 is a superset of @@ -106,8 +120,84 @@ class BaseSiteAdapter(Configurable): if not self.validateURL(): raise InvalidStoryURL(url, self.getSiteDomain(), - self.getSiteExampleURLs()) + self.getSiteExampleURLs()) + def get_cookiejar(self): + return self.cookiejar + + def set_cookiejar(self,cj): + self.cookiejar = cj + self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor()) + + def load_cookiejar(self,filename): + ''' + Needs to be called after adapter create, but before any fetchs + are done. Takes file *name*. + ''' + self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True) + + # def save_cookiejar(self,filename): + # ''' + # Assumed to be a FileCookieJar if self.cookiejar set. + # Takes file *name*. + # ''' + # self.get_cookiejar().save(filename, ignore_discard=True, ignore_expires=True) + + # def save_pagecache(self,filename): + # ''' + # Writes pickle of pagecache to file *name* + # ''' + # with open(filename, 'wb') as f: + # pickle.dump(self.get_pagecache(), + # f,protocol=pickle.HIGHEST_PROTOCOL) + + # def load_pagecache(self,filename): + # ''' + # Reads pickle of pagecache from file *name* + # ''' + # with open(filename, 'rb') as f: + # self.set_pagecache(pickle.load(f)) + + def get_pagecache(self): + return self.pagecache + + def set_pagecache(self,d): + self.pagecache=d + + def _get_cachekey(self, url, parameters=None, headers=None): + keylist=[url] + if parameters != None: + keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items()))) + if headers != None: + keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(headers.items()))) + return '?'.join(keylist) + + def _has_cachekey(self,cachekey): + return self.use_pagecache() and cachekey in self.get_pagecache() + + def _get_from_pagecache(self,cachekey): + if self.use_pagecache(): + return self.get_pagecache().get(cachekey) + else: + return None + + def _set_to_pagecache(self,cachekey,data): + if self.use_pagecache(): + self.get_pagecache()[cachekey] = data + + def use_pagecache(self): + ''' + adapters that will work with the page cache need to implement + this and change it to True. + ''' + return False + + # def story_load(self,filename): + # d = pickle.load(self.story.metadata,filename) + # self.story.metadata = d['metadata'] + # self.chapterUrls = d['chapterlist'] + # self.story.metadataDone = True + def _setURL(self,url): self.url = url self.parsedUrl = up.urlparse(url) @@ -148,8 +238,25 @@ class BaseSiteAdapter(Configurable): return "".join([x for x in data if ord(x) < 128]) # Assumes application/x-www-form-urlencoded. parameters, headers are dict()s - def _postUrl(self, url, parameters={}, headers={}): - self.do_sleep() + def _postUrl(self, url, + parameters={}, + headers={}, + extrasleep=None, + usecache=True): + ''' + When should cache be cleared or not used? logins... + + extrasleep is primarily for ffnet adapter which has extra + sleeps. Passed into fetchs so it can be bypassed when + cache hits. + ''' + cachekey=self._get_cachekey(url, parameters, headers) + if usecache and self._has_cachekey(cachekey): + logger.info("#####################################\npagecache HIT: %s"%cachekey) + return self._get_from_pagecache(cachekey) + + logger.info("#####################################\npagecache MISS: %s"%cachekey) + self.do_sleep(extrasleep) ## u2.Request assumes POST when data!=None. Also assumes data ## is application/x-www-form-urlencoded. @@ -160,41 +267,69 @@ class BaseSiteAdapter(Configurable): req = u2.Request(url, data=urllib.urlencode(parameters), headers=headers) - return self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read()) + data = self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read()) + self._set_to_pagecache(cachekey,data) + return data - def _fetchUrlRaw(self, url, parameters=None): + def _fetchUrlRaw(self, url, + parameters=None, + extrasleep=None, + usecache=True): + ''' + When should cache be cleared or not used? logins... + + extrasleep is primarily for ffnet adapter which has extra + sleeps. Passed into fetchs so it can be bypassed when + cache hits. + ''' + cachekey=self._get_cachekey(url, parameters) + if usecache and self._has_cachekey(cachekey): + logger.info("#####################################\npagecache HIT: %s"%cachekey) + return self._get_from_pagecache(cachekey) + + logger.info("#####################################\npagecache MISS: %s"%cachekey) + self.do_sleep(extrasleep) if parameters != None: - return self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))).read() + data = self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))).read() else: - return self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))).read() + data = self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))).read() + self._set_to_pagecache(cachekey,data) + return data def set_sleep(self,val): print("\n===========\n set sleep time %s\n==========="%val) self.override_sleep = val - def do_sleep(self): + def do_sleep(self,extrasleep=None): + if extrasleep: + time.sleep(float(extrasleep)) if self.override_sleep: time.sleep(float(self.override_sleep)) elif self.getConfig('slow_down_sleep_time'): time.sleep(float(self.getConfig('slow_down_sleep_time'))) # parameters is a dict() - def _fetchUrl(self, url, parameters=None): - self.do_sleep() + def _fetchUrl(self, url, + parameters=None, + usecache=True, + extrasleep=None): excpt=None for sleeptime in [0, 0.5, 4, 9]: time.sleep(sleeptime) try: - return self._decode(self._fetchUrlRaw(url,parameters)) + return self._decode(self._fetchUrlRaw(url, + parameters=parameters, + usecache=usecache, + extrasleep=extrasleep)) except u2.HTTPError, he: excpt=he if he.code == 404: logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(he))) break # break out on 404 - except Exception, e: - excpt=e - logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e))) + # except Exception, e: + # excpt=e + # logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e))) logger.error("Giving up on %s" %url) logger.exception(excpt) @@ -210,7 +345,7 @@ class BaseSiteAdapter(Configurable): # Does the download the first time it's called. def getStory(self): if not self.storyDone: - self.getStoryMetadataOnly() + self.getStoryMetadataOnly(get_cover=True) for index, (title,url) in enumerate(self.chapterUrls): if (self.chapterFirst!=None and index < self.chapterFirst) or \ @@ -253,9 +388,9 @@ class BaseSiteAdapter(Configurable): return self.story - def getStoryMetadataOnly(self): + def getStoryMetadataOnly(self,get_cover=True): if not self.metadataDone: - self.extractChapterUrlsAndMetadata() + self.doExtractChapterUrlsAndMetadata(get_cover=get_cover) if not self.story.getMetadataRaw('dateUpdated'): self.story.setMetadata('dateUpdated',self.story.getMetadataRaw('datePublished')) @@ -304,6 +439,15 @@ class BaseSiteAdapter(Configurable): """ return 'no such example' + def doExtractChapterUrlsAndMetadata(self,get_cover=True): + ''' + There are a handful of adapters that fetch a cover image while + collecting metadata. That isn't needed while *just* + collecting metadata in FG in plugin. Those few will override + this instead of extractChapterUrlsAndMetadata() + ''' + return self.extractChapterUrlsAndMetadata() + def extractChapterUrlsAndMetadata(self): "Needs to be overriden in each adapter class. Populates self.story metadata and self.chapterUrls" pass