diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py
index 4ea3269e..11160770 100644
--- a/calibre-plugin/ffdl_plugin.py
+++ b/calibre-plugin/ffdl_plugin.py
@@ -732,6 +732,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
options['version'] = self.version
logger.debug(self.version)
+ options['personal.ini'] = get_ffdl_personalini()
#print("prep_downloads:%s"%books)
@@ -825,8 +826,16 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
skip_date_update = False
- options['personal.ini'] = get_ffdl_personalini()
adapter = get_ffdl_adapter(url,fileform)
+ ## save and share cookiejar and pagecache between all
+ ## downloads.
+ if 'pagecache' not in options:
+ options['pagecache'] = adapter.get_empty_pagecache()
+ adapter.set_pagecache(options['pagecache'])
+ if 'cookiejar' not in options:
+ options['cookiejar'] = adapter.get_empty_cookiejar()
+ adapter.set_cookiejar(options['cookiejar'])
+
# reduce foreground sleep time for ffnet when few books.
if 'ffnetcount' in options and \
adapter.getConfig('tweak_fg_sleep') and \
@@ -844,7 +853,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
## or a couple tries of one or the other
for x in range(0,2):
try:
- adapter.getStoryMetadataOnly()
+ adapter.getStoryMetadataOnly(get_cover=False)
except exceptions.FailedToLogin, f:
logger.warn("Login Failed, Need Username/Password.")
userpass = UserPassDialog(self.gui,url,f)
@@ -860,7 +869,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
adapter.is_adult=True
# let other exceptions percolate up.
- story = adapter.getStoryMetadataOnly()
+ story = adapter.getStoryMetadataOnly(get_cover=False)
series = story.getMetadata('series')
if not merge and series and prefs['checkforseriesurlid']:
@@ -1088,7 +1097,18 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
dir=options['tdir'])
logger.debug("title:"+book['title'])
logger.debug("outfile:"+tmp.name)
- book['outfile'] = tmp.name
+ book['outfile'] = tmp.name
+
+ # cookiejar = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100],
+ # suffix='.cookiejar',
+ # dir=options['tdir'])
+ # adapter.save_cookiejar(cookiejar.name)
+ # book['cookiejar'] = cookiejar.name
+ # pagecache = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100],
+ # suffix='.pagecache',
+ # dir=options['tdir'])
+ # adapter.save_pagecache(pagecache.name)
+ # book['pagecache'] = pagecache.name
return
@@ -1145,7 +1165,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
_('FFDL log'), _('FFDL download ended'), msg,
show_copy_button=False)
return
-
+
+ cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar',
+ dir=options['tdir'])
+ options['cookiejar'].save(cookiejarfile.name,
+ ignore_discard=True,
+ ignore_expires=True)
+ options['cookiejarfile']=cookiejarfile.name
+ del options['cookiejar'] ## can't be pickled.
+
func = 'arbitrary_n'
cpus = self.gui.job_manager.server.pool_size
args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', 'do_download_worker',
@@ -1464,7 +1492,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
elif prefs['autoconvert']:
## 'Convert Book'.auto_convert_auto_add doesn't convert if
## the format is already there.
- fmt = calibre_prefs['output_format'].upper() # formmapping is upper.
+ fmt = calibre_prefs['output_format']
# delete if there, but not if the format we just made.
if fmt != formmapping[options['fileform']] and \
db.has_format(book_id,fmt,index_is_id=True):
diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py
index ee8a5f2b..fbae9c6c 100644
--- a/calibre-plugin/jobs.py
+++ b/calibre-plugin/jobs.py
@@ -19,6 +19,11 @@ from calibre.utils.ipc.server import Server
from calibre.utils.ipc.job import ParallelJob
from calibre.constants import numeric_version as calibre_version
+# for smarten punc
+from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
+from calibre.utils.logging import Log
+from collections import namedtuple
+
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
@@ -58,10 +63,6 @@ def do_download_worker(book_list, options,
done=None,
args=args)
job._book = book
- # job._book_id = book_id
- # job._title = title
- # job._modified_date = modified_date
- # job._existing_isbn = existing_isbn
server.add_job(job)
else:
# was already bad before the subprocess ever started.
@@ -69,7 +70,7 @@ def do_download_worker(book_list, options,
# This server is an arbitrary_n job, so there is a notifier available.
# Set the % complete to a small number to avoid the 'unavailable' indicator
- notification(0.01, 'Downloading FanFiction Stories')
+ notification(0.01, _('Downloading FanFiction Stories'))
# dequeue the job results as they arrive, saving the results
count = 0
@@ -81,24 +82,19 @@ def do_download_worker(book_list, options,
if not job.is_finished:
continue
# A job really finished. Get the information.
- output_book = job.result
- #print("output_book:%s"%output_book)
book_list.remove(job._book)
book_list.append(job.result)
book_id = job._book['calibre_id']
- #title = job._title
count = count + 1
notification(float(count)/total, '%d of %d stories finished downloading'%(count,total))
# Add this job's output to the current log
logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title']))
logger.info(job.details)
-
-
if count >= total:
- logger.info("\nSuccessful:\n%s\n"%("\n".join([book['url'] for book in
+ logger.info("\n"+_("Successful:")+"\n%s\n"%("\n".join([book['url'] for book in
filter(lambda x: x['good'], book_list) ] ) ) )
- logger.info("\nUnsuccessful:\n%s\n"%("\n".join([book['url'] for book in
+ logger.info("\n"+_("Unsuccessful:")+"\n%s\n"%("\n".join([book['url'] for book in
filter(lambda x: not x['good'], book_list) ] ) ) )
break
@@ -109,11 +105,10 @@ def do_download_worker(book_list, options,
def do_download_for_worker(book,options,notification=lambda x,y:x):
'''
- Child job, to extract isbn from formats for this specific book,
- when run as a worker job
+ Child job, to download story when run as a worker job
'''
try:
- book['comment'] = 'Download started...'
+ book['comment'] = _('Download started...')
configuration = get_ffdl_config(book['url'],
options['fileform'],
@@ -122,8 +117,8 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
configuration.set("overrides","never_make_cover","true")
- # images only for epub, even if the user mistakenly turned it
- # on else where.
+ # images only for epub, html, even if the user mistakenly
+ # turned it on else where.
if options['fileform'] not in ("epub","html"):
configuration.set("overrides","include_images","false")
@@ -133,6 +128,10 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
adapter.password = book['password']
adapter.setChaptersRange(book['begin'],book['end'])
+ adapter.load_cookiejar(options['cookiejarfile'])
+ logger.debug("cookiejar:%s"%adapter.cookiejar)
+ adapter.set_pagecache(options['pagecache'])
+
story = adapter.getStoryMetadataOnly()
if 'calibre_series' in book:
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
@@ -191,13 +190,13 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
# dup handling from ffdl_plugin needed for anthology updates.
if options['collision'] == UPDATE:
if chaptercount == urlchaptercount:
- book['comment']="Already contains %d chapters. Reuse as is."%chaptercount
+ book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount
book['outfile'] = book['epub_for_update'] # for anthology merge ops.
return book
# dup handling from ffdl_plugin needed for anthology updates.
if chaptercount > urlchaptercount:
- raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png')
+ raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
if not (options['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \
and adapter.getConfig("do_update_hook"):
@@ -208,16 +207,12 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
writer.writeStory(outfilename=outfile, forceOverwrite=True)
- book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\
+ book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\
(options['fileform'],(urlchaptercount-chaptercount),urlchaptercount)
if options['smarten_punctuation'] and options['fileform'] == "epub" \
and calibre_version >= (0, 9, 39):
# do smarten_punctuation from calibre's polish feature
- from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
- from calibre.utils.logging import Log
- from collections import namedtuple
-
data = {'smarten_punctuation':True}
opts = ALL_OPTS.copy()
opts.update(data)
diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py
index c6dbdc08..654f1ff5 100644
--- a/fanficdownloader/adapters/adapter_adastrafanficcom.py
+++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py
@@ -59,6 +59,13 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
+ def use_pagecache(self):
+ '''
+ adapters that will work with the page cache need to implement
+ this and change it to True.
+ '''
+ return True
+
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py
index fa683999..38844932 100644
--- a/fanficdownloader/adapters/adapter_fanfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fanfictionnet.py
@@ -52,6 +52,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# latest chapter yet and going back to chapter 1 to pull the
# chapter list doesn't get the latest. So save and use the
# original URL given to pull chapter list & metadata.
+ # Not used by plugin because URL gets normalized first for
+ # eliminating duplicate story urls.
self.origurl = url
if "https://m." in self.origurl:
## accept m(mobile)url, but use www.
@@ -74,14 +76,23 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"
- def _fetchUrl(self,url):
- time.sleep(1.0) ## ffnet(and, I assume, fpcom) tends to fail
- ## more if hit too fast. This is in
- ## additional to what ever the
- ## slow_down_sleep_time setting is.
- return BaseSiteAdapter._fetchUrl(self,url)
+ def _fetchUrl(self,url,parameters=None,extrasleep=1.0):
+ # time.sleep(1.0) ## ffnet(and, I assume, fpcom) tends to fail
+ # ## more if hit too fast. This is in
+ # ## additional to what ever the
+ # ## slow_down_sleep_time setting is.
+ return BaseSiteAdapter._fetchUrl(self,url,
+ parameters=parameters,
+ extrasleep=extrasleep)
- def extractChapterUrlsAndMetadata(self):
+ def use_pagecache(self):
+ '''
+ adapters that will work with the page cache need to implement
+ this and change it to True.
+ '''
+ return True
+
+ def doExtractChapterUrlsAndMetadata(self,get_cover=True):
# fetch the chapter. From that we will get almost all the
# metadata and chapter list
@@ -256,14 +267,15 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
else:
self.story.setMetadata('status', 'In-Progress')
- # Try the larger image first.
- try:
- img = soup.find('img',{'class':'lazy cimage'})
- self.setCoverImage(url,img['data-original'])
- except:
- img = soup.find('img',{'class':'cimage'})
- if img:
- self.setCoverImage(url,img['src'])
+ if get_cover:
+ # Try the larger image first.
+ try:
+ img = soup.find('img',{'class':'lazy cimage'})
+ self.setCoverImage(url,img['data-original'])
+ except:
+ img = soup.find('img',{'class':'cimage'})
+ if img:
+ self.setCoverImage(url,img['src'])
# Find the chapter selector
select = soup.find('select', { 'name' : 'chapter' } )
@@ -287,12 +299,12 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
return
def getChapterText(self, url):
- time.sleep(4.0) ## ffnet(and, I assume, fpcom) tends to fail
- ## more if hit too fast. This is in
- ## additional to what ever the
- ## slow_down_sleep_time setting is.
+ # time.sleep(4.0) ## ffnet(and, I assume, fpcom) tends to fail
+ # ## more if hit too fast. This is in
+ # ## additional to what ever the
+ # ## slow_down_sleep_time setting is.
logger.debug('Getting chapter text from: %s' % url)
- data = self._fetchUrl(url)
+ data = self._fetchUrl(url,extrasleep=4.0)
if "Please email this error message in full to support@fanfiction.com" in data:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url)
diff --git a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py
index b146ec73..d7b00536 100644
--- a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py
+++ b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py
@@ -62,6 +62,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
+ def use_pagecache(self):
+ '''
+ adapters that will work with the page cache need to implement
+ this and change it to True.
+ '''
+ return True
+
def needToLoginCheck(self, data):
if 'Registered Users Only' in data \
or 'There is no such account on our website' in data \
@@ -120,7 +127,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
url = self.url+'&index=1'+addurl
logger.debug("Changing URL: "+url)
self.performLogin(url)
- data = self._fetchUrl(url)
+ data = self._fetchUrl(url,usecache=False)
if "This story contains mature content which may include violence, sexual situations, and coarse language" in data:
raise exceptions.AdultCheckRequired(self.url)
diff --git a/fanficdownloader/adapters/adapter_tthfanficorg.py b/fanficdownloader/adapters/adapter_tthfanficorg.py
index 73ff270c..d9bc28b2 100644
--- a/fanficdownloader/adapters/adapter_tthfanficorg.py
+++ b/fanficdownloader/adapters/adapter_tthfanficorg.py
@@ -65,6 +65,13 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return r"http://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P\d+)(-\d+)?(/.*)?$"
+ def use_pagecache(self):
+ '''
+ adapters that will work with the page cache need to implement
+ this and change it to True.
+ '''
+ return True
+
# tth won't send you future updates if you aren't 'caught up'
# on the story. Login isn't required for F21, but logging in will
# mark stories you've downloaded as 'read' on tth.
@@ -136,13 +143,16 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
if self.is_adult or self.getConfig("is_adult"):
form = soup.find('form', {'id':'sitemaxratingform'})
- params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
- 'sitemaxrating':'5'}
- logger.info("Attempting to get rating cookie for %s" % url)
- data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
- # refetch story page.
- data = self._fetchUrl(url)
- soup = bs.BeautifulSoup(data)
+ # if is_adult and rating isn't already set to FR21, set it so.
+ if not form.find('option',{'value':'5'}).get('selected'):
+ params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
+ 'sitemaxrating':'5'}
+ logger.info("Attempting to get rating cookie for %s" % url)
+ data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
+ # refetch story page.
+ ## XXX - needs cache invalidate? Or at least check that it this needs doing...
+ data = self._fetchUrl(url,usecache=False)
+ soup = bs.BeautifulSoup(data)
if "NOTE: This story is rated FR21 which is above your chosen filter level." in data:
raise exceptions.AdultCheckRequired(self.url)
diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py
index 5fab6a5e..ac443c5d 100644
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@@ -22,7 +22,9 @@ import logging
import urllib
import urllib2 as u2
import urlparse as up
+import cookielib as cl
from functools import partial
+import pickle
from .. import BeautifulSoup as bs
from ..htmlcleanup import stripHTML
@@ -70,6 +72,14 @@ class BaseSiteAdapter(Configurable):
def validateURL(self):
return re.match(self.getSiteURLPattern(), self.url)
+ @staticmethod
+ def get_empty_cookiejar():
+ return cl.LWPCookieJar()
+
+ @staticmethod
+ def get_empty_pagecache():
+ return {}
+
def __init__(self, configuration, url):
Configurable.__init__(self, configuration)
@@ -78,8 +88,9 @@ class BaseSiteAdapter(Configurable):
self.is_adult=False
self.override_sleep = None
-
- self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
+ self.cookiejar = self.get_empty_cookiejar()
+ self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor())
+ # self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
## Specific UA because too many sites are blocking the default python UA.
self.opener.addheaders = [('User-agent', self.getConfig('user_agent'))]
self.storyDone = False
@@ -95,6 +106,9 @@ class BaseSiteAdapter(Configurable):
self.oldcover = None # (data of existing cover html, data of existing cover image)
self.calibrebookmark = None
self.logfile = None
+
+ self.pagecache = self.get_empty_pagecache()
+
## order of preference for decoding.
self.decode = ["utf8",
"Windows-1252"] # 1252 is a superset of
@@ -106,8 +120,84 @@ class BaseSiteAdapter(Configurable):
if not self.validateURL():
raise InvalidStoryURL(url,
self.getSiteDomain(),
- self.getSiteExampleURLs())
+ self.getSiteExampleURLs())
+ def get_cookiejar(self):
+ return self.cookiejar
+
+ def set_cookiejar(self,cj):
+ self.cookiejar = cj
+ self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor())
+
+ def load_cookiejar(self,filename):
+ '''
+ Needs to be called after adapter create, but before any fetchs
+ are done. Takes file *name*.
+ '''
+ self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True)
+
+ # def save_cookiejar(self,filename):
+ # '''
+ # Assumed to be a FileCookieJar if self.cookiejar set.
+ # Takes file *name*.
+ # '''
+ # self.get_cookiejar().save(filename, ignore_discard=True, ignore_expires=True)
+
+ # def save_pagecache(self,filename):
+ # '''
+ # Writes pickle of pagecache to file *name*
+ # '''
+ # with open(filename, 'wb') as f:
+ # pickle.dump(self.get_pagecache(),
+ # f,protocol=pickle.HIGHEST_PROTOCOL)
+
+ # def load_pagecache(self,filename):
+ # '''
+ # Reads pickle of pagecache from file *name*
+ # '''
+ # with open(filename, 'rb') as f:
+ # self.set_pagecache(pickle.load(f))
+
+ def get_pagecache(self):
+ return self.pagecache
+
+ def set_pagecache(self,d):
+ self.pagecache=d
+
+ def _get_cachekey(self, url, parameters=None, headers=None):
+ keylist=[url]
+ if parameters != None:
+ keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items())))
+ if headers != None:
+ keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(headers.items())))
+ return '?'.join(keylist)
+
+ def _has_cachekey(self,cachekey):
+ return self.use_pagecache() and cachekey in self.get_pagecache()
+
+ def _get_from_pagecache(self,cachekey):
+ if self.use_pagecache():
+ return self.get_pagecache().get(cachekey)
+ else:
+ return None
+
+ def _set_to_pagecache(self,cachekey,data):
+ if self.use_pagecache():
+ self.get_pagecache()[cachekey] = data
+
+ def use_pagecache(self):
+ '''
+ adapters that will work with the page cache need to implement
+ this and change it to True.
+ '''
+ return False
+
+ # def story_load(self,filename):
+ # d = pickle.load(self.story.metadata,filename)
+ # self.story.metadata = d['metadata']
+ # self.chapterUrls = d['chapterlist']
+ # self.story.metadataDone = True
+
def _setURL(self,url):
self.url = url
self.parsedUrl = up.urlparse(url)
@@ -148,8 +238,25 @@ class BaseSiteAdapter(Configurable):
return "".join([x for x in data if ord(x) < 128])
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
- def _postUrl(self, url, parameters={}, headers={}):
- self.do_sleep()
+ def _postUrl(self, url,
+ parameters={},
+ headers={},
+ extrasleep=None,
+ usecache=True):
+ '''
+ When should cache be cleared or not used? logins...
+
+ extrasleep is primarily for ffnet adapter which has extra
+ sleeps. Passed into fetchs so it can be bypassed when
+ cache hits.
+ '''
+ cachekey=self._get_cachekey(url, parameters, headers)
+ if usecache and self._has_cachekey(cachekey):
+ logger.info("#####################################\npagecache HIT: %s"%cachekey)
+ return self._get_from_pagecache(cachekey)
+
+ logger.info("#####################################\npagecache MISS: %s"%cachekey)
+ self.do_sleep(extrasleep)
## u2.Request assumes POST when data!=None. Also assumes data
## is application/x-www-form-urlencoded.
@@ -160,41 +267,69 @@ class BaseSiteAdapter(Configurable):
req = u2.Request(url,
data=urllib.urlencode(parameters),
headers=headers)
- return self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read())
+ data = self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read())
+ self._set_to_pagecache(cachekey,data)
+ return data
- def _fetchUrlRaw(self, url, parameters=None):
+ def _fetchUrlRaw(self, url,
+ parameters=None,
+ extrasleep=None,
+ usecache=True):
+ '''
+ When should cache be cleared or not used? logins...
+
+ extrasleep is primarily for ffnet adapter which has extra
+ sleeps. Passed into fetchs so it can be bypassed when
+ cache hits.
+ '''
+ cachekey=self._get_cachekey(url, parameters)
+ if usecache and self._has_cachekey(cachekey):
+ logger.info("#####################################\npagecache HIT: %s"%cachekey)
+ return self._get_from_pagecache(cachekey)
+
+ logger.info("#####################################\npagecache MISS: %s"%cachekey)
+ self.do_sleep(extrasleep)
if parameters != None:
- return self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))).read()
+ data = self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))).read()
else:
- return self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))).read()
+ data = self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))).read()
+ self._set_to_pagecache(cachekey,data)
+ return data
def set_sleep(self,val):
print("\n===========\n set sleep time %s\n==========="%val)
self.override_sleep = val
- def do_sleep(self):
+ def do_sleep(self,extrasleep=None):
+ if extrasleep:
+ time.sleep(float(extrasleep))
if self.override_sleep:
time.sleep(float(self.override_sleep))
elif self.getConfig('slow_down_sleep_time'):
time.sleep(float(self.getConfig('slow_down_sleep_time')))
# parameters is a dict()
- def _fetchUrl(self, url, parameters=None):
- self.do_sleep()
+ def _fetchUrl(self, url,
+ parameters=None,
+ usecache=True,
+ extrasleep=None):
excpt=None
for sleeptime in [0, 0.5, 4, 9]:
time.sleep(sleeptime)
try:
- return self._decode(self._fetchUrlRaw(url,parameters))
+ return self._decode(self._fetchUrlRaw(url,
+ parameters=parameters,
+ usecache=usecache,
+ extrasleep=extrasleep))
except u2.HTTPError, he:
excpt=he
if he.code == 404:
logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(he)))
break # break out on 404
- except Exception, e:
- excpt=e
- logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
+ # except Exception, e:
+ # excpt=e
+ # logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
logger.error("Giving up on %s" %url)
logger.exception(excpt)
@@ -210,7 +345,7 @@ class BaseSiteAdapter(Configurable):
# Does the download the first time it's called.
def getStory(self):
if not self.storyDone:
- self.getStoryMetadataOnly()
+ self.getStoryMetadataOnly(get_cover=True)
for index, (title,url) in enumerate(self.chapterUrls):
if (self.chapterFirst!=None and index < self.chapterFirst) or \
@@ -253,9 +388,9 @@ class BaseSiteAdapter(Configurable):
return self.story
- def getStoryMetadataOnly(self):
+ def getStoryMetadataOnly(self,get_cover=True):
if not self.metadataDone:
- self.extractChapterUrlsAndMetadata()
+ self.doExtractChapterUrlsAndMetadata(get_cover=get_cover)
if not self.story.getMetadataRaw('dateUpdated'):
self.story.setMetadata('dateUpdated',self.story.getMetadataRaw('datePublished'))
@@ -304,6 +439,15 @@ class BaseSiteAdapter(Configurable):
"""
return 'no such example'
+ def doExtractChapterUrlsAndMetadata(self,get_cover=True):
+ '''
+ There are a handful of adapters that fetch a cover image while
+ collecting metadata. That isn't needed while *just*
+ collecting metadata in FG in plugin. Those few will override
+ this instead of extractChapterUrlsAndMetadata()
+ '''
+ return self.extractChapterUrlsAndMetadata()
+
def extractChapterUrlsAndMetadata(self):
"Needs to be overriden in each adapter class. Populates self.story metadata and self.chapterUrls"
pass