Adding fetched file caching feature and optimizing hits for ffnet in particular.

2026-01-16 13:11:51 +01:00 · 2014-08-31 13:48:47 -05:00 · 2014-08-31 13:48:47 -05:00 · 667c19ac3c
commit 667c19ac3c
parent 5de217a0e3
7 changed files with 280 additions and 77 deletions
--- a/calibre-plugin/ffdl_plugin.py
+++ b/calibre-plugin/ffdl_plugin.py
@ -732,6 +732,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):

        options['version'] = self.version
        logger.debug(self.version)
+        options['personal.ini'] = get_ffdl_personalini()
        
        #print("prep_downloads:%s"%books)

@ -825,8 +826,16 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        
        skip_date_update = False
        
-        options['personal.ini'] = get_ffdl_personalini()
        adapter = get_ffdl_adapter(url,fileform)
+        ## save and share cookiejar and pagecache between all
+        ## downloads.
+        if 'pagecache' not in options:
+            options['pagecache'] = adapter.get_empty_pagecache()
+        adapter.set_pagecache(options['pagecache'])
+        if 'cookiejar' not in options:
+            options['cookiejar'] = adapter.get_empty_cookiejar()
+        adapter.set_cookiejar(options['cookiejar'])
+            
        # reduce foreground sleep time for ffnet when few books.
        if 'ffnetcount' in options and \
                adapter.getConfig('tweak_fg_sleep') and \
@ -844,7 +853,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        ## or a couple tries of one or the other
        for x in range(0,2):
            try:
-                adapter.getStoryMetadataOnly()
+                adapter.getStoryMetadataOnly(get_cover=False)
            except exceptions.FailedToLogin, f:
                logger.warn("Login Failed, Need Username/Password.")
                userpass = UserPassDialog(self.gui,url,f)
@ -860,7 +869,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
                    adapter.is_adult=True

        # let other exceptions percolate up.
-        story = adapter.getStoryMetadataOnly()
+        story = adapter.getStoryMetadataOnly(get_cover=False)

        series = story.getMetadata('series')
        if not merge and series and prefs['checkforseriesurlid']:
@ -1088,7 +1097,18 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
                                          dir=options['tdir'])
            logger.debug("title:"+book['title'])
            logger.debug("outfile:"+tmp.name)
-            book['outfile'] = tmp.name            
+            book['outfile'] = tmp.name
+            
+            # cookiejar = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100],
+            #                                     suffix='.cookiejar',
+            #                                     dir=options['tdir'])
+            # adapter.save_cookiejar(cookiejar.name)
+            # book['cookiejar'] = cookiejar.name
+            # pagecache = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100],
+            #                                     suffix='.pagecache',
+            #                                     dir=options['tdir'])
+            # adapter.save_pagecache(pagecache.name)
+            # book['pagecache'] = pagecache.name
                
        return
        
@ -1145,7 +1165,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
                                      _('FFDL log'), _('FFDL download ended'), msg,
                                      show_copy_button=False)
            return
-            
+
+        cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar',
+                                                dir=options['tdir'])
+        options['cookiejar'].save(cookiejarfile.name,
+                                  ignore_discard=True,
+                                  ignore_expires=True)
+        options['cookiejarfile']=cookiejarfile.name
+        del options['cookiejar'] ## can't be pickled.
+        
        func = 'arbitrary_n'
        cpus = self.gui.job_manager.server.pool_size
        args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', 'do_download_worker',
@ -1464,7 +1492,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        elif prefs['autoconvert']:
            ## 'Convert Book'.auto_convert_auto_add doesn't convert if
            ## the format is already there.
-            fmt = calibre_prefs['output_format'].upper() # formmapping is upper.
+            fmt = calibre_prefs['output_format']
            # delete if there, but not if the format we just made.
            if fmt != formmapping[options['fileform']] and \
                    db.has_format(book_id,fmt,index_is_id=True):
--- a/calibre-plugin/jobs.py
+++ b/calibre-plugin/jobs.py
@ -19,6 +19,11 @@ from calibre.utils.ipc.server import Server
 from calibre.utils.ipc.job import ParallelJob
 from calibre.constants import numeric_version as calibre_version

+# for smarten punc
+from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
+from calibre.utils.logging import Log
+from collections import namedtuple
+
 from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
    OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
 from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
@ -58,10 +63,6 @@ def do_download_worker(book_list, options,
                              done=None,
                              args=args)
            job._book = book
-            # job._book_id = book_id
-            # job._title = title
-            # job._modified_date = modified_date
-            # job._existing_isbn = existing_isbn
            server.add_job(job)
        else:
            # was already bad before the subprocess ever started.
@ -69,7 +70,7 @@ def do_download_worker(book_list, options,
    
    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
-    notification(0.01, 'Downloading FanFiction Stories')
+    notification(0.01, _('Downloading FanFiction Stories'))

    # dequeue the job results as they arrive, saving the results
    count = 0
@ -81,24 +82,19 @@ def do_download_worker(book_list, options,
        if not job.is_finished:
            continue
        # A job really finished. Get the information.
-        output_book = job.result
-        #print("output_book:%s"%output_book)
        book_list.remove(job._book)
        book_list.append(job.result)
        book_id = job._book['calibre_id']
-        #title = job._title
        count = count + 1
        notification(float(count)/total, '%d of %d stories finished downloading'%(count,total))
        # Add this job's output to the current log
        logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title']))
        logger.info(job.details)

-
-
        if count >= total:
-            logger.info("\nSuccessful:\n%s\n"%("\n".join([book['url'] for book in 
+            logger.info("\n"+_("Successful:")+"\n%s\n"%("\n".join([book['url'] for book in 
                                                      filter(lambda x: x['good'], book_list) ] ) ) )
-            logger.info("\nUnsuccessful:\n%s\n"%("\n".join([book['url'] for book in 
+            logger.info("\n"+_("Unsuccessful:")+"\n%s\n"%("\n".join([book['url'] for book in 
                                                        filter(lambda x: not x['good'], book_list) ] ) ) )
            break

@ -109,11 +105,10 @@ def do_download_worker(book_list, options,

 def do_download_for_worker(book,options,notification=lambda x,y:x):
    '''
-    Child job, to extract isbn from formats for this specific book,
-    when run as a worker job
+    Child job, to download story when run as a worker job
    '''
    try:
-        book['comment'] = 'Download started...'
+        book['comment'] = _('Download started...')

        configuration = get_ffdl_config(book['url'],
                                        options['fileform'],
@ -122,8 +117,8 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
        if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
            configuration.set("overrides","never_make_cover","true")

-        # images only for epub, even if the user mistakenly turned it
-        # on else where.
+        # images only for epub, html, even if the user mistakenly
+        # turned it on else where.
        if options['fileform'] not in ("epub","html"):
            configuration.set("overrides","include_images","false")
        
@ -133,6 +128,10 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
        adapter.password = book['password']
        adapter.setChaptersRange(book['begin'],book['end'])
        
+        adapter.load_cookiejar(options['cookiejarfile'])
+        logger.debug("cookiejar:%s"%adapter.cookiejar)
+        adapter.set_pagecache(options['pagecache'])
+        
        story = adapter.getStoryMetadataOnly()
        if 'calibre_series' in book:
            adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
@ -191,13 +190,13 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
            # dup handling from ffdl_plugin needed for anthology updates.
            if options['collision'] == UPDATE:
                if chaptercount == urlchaptercount:
-                    book['comment']="Already contains %d chapters.  Reuse as is."%chaptercount
+                    book['comment']=_("Already contains %d chapters.  Reuse as is.")%chaptercount
                    book['outfile'] = book['epub_for_update'] # for anthology merge ops.
                    return book

            # dup handling from ffdl_plugin needed for anthology updates.
            if chaptercount > urlchaptercount:
-                raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png')
+                raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')

            if not (options['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \
                    and adapter.getConfig("do_update_hook"):
@ -208,16 +207,12 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):

            writer.writeStory(outfilename=outfile, forceOverwrite=True)
            
-            book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\
+            book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\
                (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount)
        
        if options['smarten_punctuation'] and options['fileform'] == "epub" \
                and calibre_version >= (0, 9, 39):
            # do smarten_punctuation from calibre's polish feature
-            from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
-            from calibre.utils.logging import Log
-            from collections import namedtuple
-        
            data = {'smarten_punctuation':True}
            opts = ALL_OPTS.copy()
            opts.update(data)
--- a/fanficdownloader/adapters/adapter_adastrafanficcom.py
+++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py
@ -59,6 +59,13 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
    def getSiteURLPattern(self):
        return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"

+    def use_pagecache(self):
+        '''
+        adapters that will work with the page cache need to implement
+        this and change it to True.
+        '''
+        return True
+    
    def extractChapterUrlsAndMetadata(self):

        if self.is_adult or self.getConfig("is_adult"):
--- a/fanficdownloader/adapters/adapter_fanfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fanfictionnet.py
@ -52,6 +52,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
        # latest chapter yet and going back to chapter 1 to pull the
        # chapter list doesn't get the latest.  So save and use the
        # original URL given to pull chapter list & metadata.
+        # Not used by plugin because URL gets normalized first for
+        # eliminating duplicate story urls.
        self.origurl = url
        if "https://m." in self.origurl:
            ## accept m(mobile)url, but use www.
@ -74,14 +76,23 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
    def getSiteURLPattern(self):
        return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"

-    def _fetchUrl(self,url):
-        time.sleep(1.0) ## ffnet(and, I assume, fpcom) tends to fail
-                        ## more if hit too fast.  This is in
-                        ## additional to what ever the
-                        ## slow_down_sleep_time setting is.
-        return BaseSiteAdapter._fetchUrl(self,url)
+    def _fetchUrl(self,url,parameters=None,extrasleep=1.0):
+        # time.sleep(1.0) ## ffnet(and, I assume, fpcom) tends to fail
+        #                 ## more if hit too fast.  This is in
+        #                 ## additional to what ever the
+        #                 ## slow_down_sleep_time setting is.
+        return BaseSiteAdapter._fetchUrl(self,url,
+                                         parameters=parameters,
+                                         extrasleep=extrasleep)

-    def extractChapterUrlsAndMetadata(self):
+    def use_pagecache(self):
+        '''
+        adapters that will work with the page cache need to implement
+        this and change it to True.
+        '''
+        return True
+    
+    def doExtractChapterUrlsAndMetadata(self,get_cover=True):

        # fetch the chapter.  From that we will get almost all the
        # metadata and chapter list
@ -256,14 +267,15 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
        else:
            self.story.setMetadata('status', 'In-Progress')

-        # Try the larger image first.
-        try:
-            img = soup.find('img',{'class':'lazy cimage'})
-            self.setCoverImage(url,img['data-original'])
-        except:
-            img = soup.find('img',{'class':'cimage'})
-            if img:
-                self.setCoverImage(url,img['src'])
+        if get_cover:
+            # Try the larger image first.
+            try:
+                img = soup.find('img',{'class':'lazy cimage'})
+                self.setCoverImage(url,img['data-original'])
+            except:
+                img = soup.find('img',{'class':'cimage'})
+                if img:
+                    self.setCoverImage(url,img['src'])
            
        # Find the chapter selector 
        select = soup.find('select', { 'name' : 'chapter' } )
@ -287,12 +299,12 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
        return

    def getChapterText(self, url):
-        time.sleep(4.0) ## ffnet(and, I assume, fpcom) tends to fail
-                        ## more if hit too fast.  This is in
-                        ## additional to what ever the
-                        ## slow_down_sleep_time setting is.
+        # time.sleep(4.0) ## ffnet(and, I assume, fpcom) tends to fail
+        #                 ## more if hit too fast.  This is in
+        #                 ## additional to what ever the
+        #                 ## slow_down_sleep_time setting is.
        logger.debug('Getting chapter text from: %s' % url)
-        data = self._fetchUrl(url)
+        data = self._fetchUrl(url,extrasleep=4.0)

        if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  FanFiction.net Site Error!" % url)
--- a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py
+++ b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py
@ -62,6 +62,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
    def getSiteURLPattern(self):
        return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"

+    def use_pagecache(self):
+        '''
+        adapters that will work with the page cache need to implement
+        this and change it to True.
+        '''
+        return True
+    
    def needToLoginCheck(self, data):
        if 'Registered Users Only' in data \
                or 'There is no such account on our website' in data \
@ -120,7 +127,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
            url = self.url+'&index=1'+addurl
            logger.debug("Changing URL: "+url)
            self.performLogin(url)
-            data = self._fetchUrl(url)
+            data = self._fetchUrl(url,usecache=False)

        if "This story contains mature content which may include violence, sexual situations, and coarse language" in data:
            raise exceptions.AdultCheckRequired(self.url)
--- a/fanficdownloader/adapters/adapter_tthfanficorg.py
+++ b/fanficdownloader/adapters/adapter_tthfanficorg.py
@ -65,6 +65,13 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
    def getSiteURLPattern(self):
        return r"http://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P<id>\d+)(-\d+)?(/.*)?$"

+    def use_pagecache(self):
+        '''
+        adapters that will work with the page cache need to implement
+        this and change it to True.
+        '''
+        return True
+    
    # tth won't send you future updates if you aren't 'caught up'
    # on the story.  Login isn't required for F21, but logging in will
    # mark stories you've downloaded as 'read' on tth.
@ -136,13 +143,16 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
        
        if self.is_adult or self.getConfig("is_adult"):
            form = soup.find('form', {'id':'sitemaxratingform'})
-            params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
-                    'sitemaxrating':'5'}
-            logger.info("Attempting to get rating cookie for %s" % url)
-            data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
-            # refetch story page.
-            data = self._fetchUrl(url)
-            soup = bs.BeautifulSoup(data)
+            # if is_adult and rating isn't already set to FR21, set it so.
+            if not form.find('option',{'value':'5'}).get('selected'):
+                params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
+                        'sitemaxrating':'5'}
+                logger.info("Attempting to get rating cookie for %s" % url)
+                data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
+                # refetch story page.
+                ## XXX - needs cache invalidate?  Or at least check that it this needs doing...
+                data = self._fetchUrl(url,usecache=False)
+                soup = bs.BeautifulSoup(data)

        if "NOTE: This story is rated FR21 which is above your chosen filter level." in data:
            raise exceptions.AdultCheckRequired(self.url)
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@ -22,7 +22,9 @@ import logging
 import urllib
 import urllib2 as u2
 import urlparse as up
+import cookielib as cl
 from functools import partial
+import pickle

 from .. import BeautifulSoup as bs
 from ..htmlcleanup import stripHTML
@ -70,6 +72,14 @@ class BaseSiteAdapter(Configurable):
    def validateURL(self):
        return re.match(self.getSiteURLPattern(), self.url)

+    @staticmethod
+    def get_empty_cookiejar():
+        return cl.LWPCookieJar()
+
+    @staticmethod
+    def get_empty_pagecache():
+        return {}
+
    def __init__(self, configuration, url):
        Configurable.__init__(self, configuration)
        
@ -78,8 +88,9 @@ class BaseSiteAdapter(Configurable):
        self.is_adult=False

        self.override_sleep = None
-        
-        self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
+        self.cookiejar = self.get_empty_cookiejar()
+        self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor())
+        # self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
        ## Specific UA because too many sites are blocking the default python UA.
        self.opener.addheaders = [('User-agent', self.getConfig('user_agent'))]
        self.storyDone = False
@ -95,6 +106,9 @@ class BaseSiteAdapter(Configurable):
        self.oldcover = None # (data of existing cover html, data of existing cover image)
        self.calibrebookmark = None
        self.logfile = None
+
+        self.pagecache = self.get_empty_pagecache()
+        
        ## order of preference for decoding.
        self.decode = ["utf8",
                       "Windows-1252"] # 1252 is a superset of
@ -106,8 +120,84 @@ class BaseSiteAdapter(Configurable):
        if not self.validateURL():
            raise InvalidStoryURL(url,
                                  self.getSiteDomain(),
-                                  self.getSiteExampleURLs())        
+                                  self.getSiteExampleURLs())

+    def get_cookiejar(self):
+        return self.cookiejar
+
+    def set_cookiejar(self,cj):
+        self.cookiejar = cj
+        self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor())
+        
+    def load_cookiejar(self,filename):
+        '''
+        Needs to be called after adapter create, but before any fetchs
+        are done.  Takes file *name*.
+        '''
+        self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True)
+        
+    # def save_cookiejar(self,filename):
+    #     '''
+    #     Assumed to be a FileCookieJar if self.cookiejar set.
+    #     Takes file *name*.
+    #     '''
+    #     self.get_cookiejar().save(filename, ignore_discard=True, ignore_expires=True)
+
+    # def save_pagecache(self,filename):
+    #     '''
+    #     Writes pickle of pagecache to file *name*
+    #     '''
+    #     with open(filename, 'wb') as f:
+    #         pickle.dump(self.get_pagecache(),
+    #                     f,protocol=pickle.HIGHEST_PROTOCOL)
+        
+    # def load_pagecache(self,filename):
+    #     '''
+    #     Reads pickle of pagecache from file *name*
+    #     '''
+    #     with open(filename, 'rb') as f:
+    #         self.set_pagecache(pickle.load(f))
+
+    def get_pagecache(self):
+        return self.pagecache
+    
+    def set_pagecache(self,d):
+        self.pagecache=d
+
+    def _get_cachekey(self, url, parameters=None, headers=None):
+        keylist=[url]
+        if parameters != None:
+            keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items())))
+        if headers != None:
+            keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(headers.items())))
+        return '?'.join(keylist)
+
+    def _has_cachekey(self,cachekey):
+        return self.use_pagecache() and cachekey in self.get_pagecache()
+    
+    def _get_from_pagecache(self,cachekey):
+        if self.use_pagecache():
+            return self.get_pagecache().get(cachekey)
+        else:
+            return None
+
+    def _set_to_pagecache(self,cachekey,data):
+        if self.use_pagecache():
+            self.get_pagecache()[cachekey] = data
+
+    def use_pagecache(self):
+        '''
+        adapters that will work with the page cache need to implement
+        this and change it to True.
+        '''
+        return False
+        
+    # def story_load(self,filename):
+    #     d = pickle.load(self.story.metadata,filename)
+    #     self.story.metadata = d['metadata']
+    #     self.chapterUrls = d['chapterlist']
+    #     self.story.metadataDone = True
+        
    def _setURL(self,url):
        self.url = url
        self.parsedUrl = up.urlparse(url)
@ -148,8 +238,25 @@ class BaseSiteAdapter(Configurable):
        return "".join([x for x in data if ord(x) < 128])

    # Assumes application/x-www-form-urlencoded.  parameters, headers are dict()s
-    def _postUrl(self, url, parameters={}, headers={}):
-        self.do_sleep()
+    def _postUrl(self, url,
+                 parameters={},
+                 headers={},
+                 extrasleep=None,
+                 usecache=True):
+        '''
+        When should cache be cleared or not used? logins...
+        
+        extrasleep is primarily for ffnet adapter which has extra
+        sleeps.  Passed into fetchs so it can be bypassed when
+        cache hits.
+        '''
+        cachekey=self._get_cachekey(url, parameters, headers)
+        if usecache and self._has_cachekey(cachekey):
+            logger.info("#####################################\npagecache HIT: %s"%cachekey)
+            return self._get_from_pagecache(cachekey)
+        
+        logger.info("#####################################\npagecache MISS: %s"%cachekey)
+        self.do_sleep(extrasleep)

        ## u2.Request assumes POST when data!=None.  Also assumes data
        ## is application/x-www-form-urlencoded.
@ -160,41 +267,69 @@ class BaseSiteAdapter(Configurable):
        req = u2.Request(url,
                         data=urllib.urlencode(parameters),
                         headers=headers)
-        return self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read())
+        data = self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read())
+        self._set_to_pagecache(cachekey,data)
+        return data

-    def _fetchUrlRaw(self, url, parameters=None):
+    def _fetchUrlRaw(self, url,
+                     parameters=None,
+                     extrasleep=None,
+                     usecache=True):
+        '''
+        When should cache be cleared or not used? logins...
+        
+        extrasleep is primarily for ffnet adapter which has extra
+        sleeps.  Passed into fetchs so it can be bypassed when
+        cache hits.
+        '''
+        cachekey=self._get_cachekey(url, parameters)
+        if usecache and self._has_cachekey(cachekey):
+            logger.info("#####################################\npagecache HIT: %s"%cachekey)
+            return self._get_from_pagecache(cachekey)
+        
+        logger.info("#####################################\npagecache MISS: %s"%cachekey)
+        self.do_sleep(extrasleep)
        if parameters != None:
-            return self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))).read()
+            data = self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))).read()
        else:
-            return self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))).read()
+            data = self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))).read()
+        self._set_to_pagecache(cachekey,data)
+        return data

    def set_sleep(self,val):
        print("\n===========\n set sleep time %s\n==========="%val)
        self.override_sleep = val
    
-    def do_sleep(self):
+    def do_sleep(self,extrasleep=None):
+        if extrasleep:
+            time.sleep(float(extrasleep))
        if self.override_sleep:
            time.sleep(float(self.override_sleep))
        elif self.getConfig('slow_down_sleep_time'):
            time.sleep(float(self.getConfig('slow_down_sleep_time')))
        
    # parameters is a dict()
-    def _fetchUrl(self, url, parameters=None):
-        self.do_sleep()
+    def _fetchUrl(self, url,
+                  parameters=None,
+                  usecache=True,
+                  extrasleep=None):

        excpt=None
        for sleeptime in [0, 0.5, 4, 9]:
            time.sleep(sleeptime)	
            try:
-                return self._decode(self._fetchUrlRaw(url,parameters))
+                return self._decode(self._fetchUrlRaw(url,
+                                                      parameters=parameters,
+                                                      usecache=usecache,
+                                                      extrasleep=extrasleep))
            except u2.HTTPError, he:
                excpt=he
                if he.code == 404:
                    logger.warn("Caught an exception reading URL: %s  Exception %s."%(unicode(url),unicode(he)))
                    break # break out on 404
-            except Exception, e:
-                excpt=e
-                logger.warn("Caught an exception reading URL: %s  Exception %s."%(unicode(url),unicode(e)))
+            # except Exception, e:
+            #     excpt=e
+            #     logger.warn("Caught an exception reading URL: %s  Exception %s."%(unicode(url),unicode(e)))
                
        logger.error("Giving up on %s" %url)
        logger.exception(excpt)
@ -210,7 +345,7 @@ class BaseSiteAdapter(Configurable):
    # Does the download the first time it's called.
    def getStory(self):
        if not self.storyDone:
-            self.getStoryMetadataOnly()
+            self.getStoryMetadataOnly(get_cover=True)

            for index, (title,url) in enumerate(self.chapterUrls):
                if (self.chapterFirst!=None and index < self.chapterFirst) or \
@ -253,9 +388,9 @@ class BaseSiteAdapter(Configurable):
                
        return self.story

-    def getStoryMetadataOnly(self):
+    def getStoryMetadataOnly(self,get_cover=True):
        if not self.metadataDone:
-            self.extractChapterUrlsAndMetadata()
+            self.doExtractChapterUrlsAndMetadata(get_cover=get_cover)
            
            if not self.story.getMetadataRaw('dateUpdated'):
                self.story.setMetadata('dateUpdated',self.story.getMetadataRaw('datePublished'))
@ -304,6 +439,15 @@ class BaseSiteAdapter(Configurable):
        """
        return 'no such example'
    
+    def doExtractChapterUrlsAndMetadata(self,get_cover=True):
+        '''
+        There are a handful of adapters that fetch a cover image while
+        collecting metadata.  That isn't needed while *just*
+        collecting metadata in FG in plugin.  Those few will override
+        this instead of extractChapterUrlsAndMetadata()
+        '''
+        return self.extractChapterUrlsAndMetadata()
+    
    def extractChapterUrlsAndMetadata(self):
        "Needs to be overriden in each adapter class.  Populates self.story metadata and self.chapterUrls"
        pass