Adding fetched file caching feature and optimizing hits for ffnet in particular.

This commit is contained in:
Jim Miller 2014-08-31 13:48:47 -05:00
parent 5de217a0e3
commit 667c19ac3c
7 changed files with 280 additions and 77 deletions

View file

@ -732,6 +732,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
options['version'] = self.version
logger.debug(self.version)
options['personal.ini'] = get_ffdl_personalini()
#print("prep_downloads:%s"%books)
@ -825,8 +826,16 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
skip_date_update = False
options['personal.ini'] = get_ffdl_personalini()
adapter = get_ffdl_adapter(url,fileform)
## save and share cookiejar and pagecache between all
## downloads.
if 'pagecache' not in options:
options['pagecache'] = adapter.get_empty_pagecache()
adapter.set_pagecache(options['pagecache'])
if 'cookiejar' not in options:
options['cookiejar'] = adapter.get_empty_cookiejar()
adapter.set_cookiejar(options['cookiejar'])
# reduce foreground sleep time for ffnet when few books.
if 'ffnetcount' in options and \
adapter.getConfig('tweak_fg_sleep') and \
@ -844,7 +853,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
## or a couple tries of one or the other
for x in range(0,2):
try:
adapter.getStoryMetadataOnly()
adapter.getStoryMetadataOnly(get_cover=False)
except exceptions.FailedToLogin, f:
logger.warn("Login Failed, Need Username/Password.")
userpass = UserPassDialog(self.gui,url,f)
@ -860,7 +869,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
adapter.is_adult=True
# let other exceptions percolate up.
story = adapter.getStoryMetadataOnly()
story = adapter.getStoryMetadataOnly(get_cover=False)
series = story.getMetadata('series')
if not merge and series and prefs['checkforseriesurlid']:
@ -1088,7 +1097,18 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
dir=options['tdir'])
logger.debug("title:"+book['title'])
logger.debug("outfile:"+tmp.name)
book['outfile'] = tmp.name
book['outfile'] = tmp.name
# cookiejar = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100],
# suffix='.cookiejar',
# dir=options['tdir'])
# adapter.save_cookiejar(cookiejar.name)
# book['cookiejar'] = cookiejar.name
# pagecache = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100],
# suffix='.pagecache',
# dir=options['tdir'])
# adapter.save_pagecache(pagecache.name)
# book['pagecache'] = pagecache.name
return
@ -1145,7 +1165,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
_('FFDL log'), _('FFDL download ended'), msg,
show_copy_button=False)
return
cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar',
dir=options['tdir'])
options['cookiejar'].save(cookiejarfile.name,
ignore_discard=True,
ignore_expires=True)
options['cookiejarfile']=cookiejarfile.name
del options['cookiejar'] ## can't be pickled.
func = 'arbitrary_n'
cpus = self.gui.job_manager.server.pool_size
args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', 'do_download_worker',
@ -1464,7 +1492,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
elif prefs['autoconvert']:
## 'Convert Book'.auto_convert_auto_add doesn't convert if
## the format is already there.
fmt = calibre_prefs['output_format'].upper() # formmapping is upper.
fmt = calibre_prefs['output_format']
# delete if there, but not if the format we just made.
if fmt != formmapping[options['fileform']] and \
db.has_format(book_id,fmt,index_is_id=True):

View file

@ -19,6 +19,11 @@ from calibre.utils.ipc.server import Server
from calibre.utils.ipc.job import ParallelJob
from calibre.constants import numeric_version as calibre_version
# for smarten punc
from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
from calibre.utils.logging import Log
from collections import namedtuple
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
@ -58,10 +63,6 @@ def do_download_worker(book_list, options,
done=None,
args=args)
job._book = book
# job._book_id = book_id
# job._title = title
# job._modified_date = modified_date
# job._existing_isbn = existing_isbn
server.add_job(job)
else:
# was already bad before the subprocess ever started.
@ -69,7 +70,7 @@ def do_download_worker(book_list, options,
# This server is an arbitrary_n job, so there is a notifier available.
# Set the % complete to a small number to avoid the 'unavailable' indicator
notification(0.01, 'Downloading FanFiction Stories')
notification(0.01, _('Downloading FanFiction Stories'))
# dequeue the job results as they arrive, saving the results
count = 0
@ -81,24 +82,19 @@ def do_download_worker(book_list, options,
if not job.is_finished:
continue
# A job really finished. Get the information.
output_book = job.result
#print("output_book:%s"%output_book)
book_list.remove(job._book)
book_list.append(job.result)
book_id = job._book['calibre_id']
#title = job._title
count = count + 1
notification(float(count)/total, '%d of %d stories finished downloading'%(count,total))
# Add this job's output to the current log
logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title']))
logger.info(job.details)
if count >= total:
logger.info("\nSuccessful:\n%s\n"%("\n".join([book['url'] for book in
logger.info("\n"+_("Successful:")+"\n%s\n"%("\n".join([book['url'] for book in
filter(lambda x: x['good'], book_list) ] ) ) )
logger.info("\nUnsuccessful:\n%s\n"%("\n".join([book['url'] for book in
logger.info("\n"+_("Unsuccessful:")+"\n%s\n"%("\n".join([book['url'] for book in
filter(lambda x: not x['good'], book_list) ] ) ) )
break
@ -109,11 +105,10 @@ def do_download_worker(book_list, options,
def do_download_for_worker(book,options,notification=lambda x,y:x):
'''
Child job, to extract isbn from formats for this specific book,
when run as a worker job
Child job, to download story when run as a worker job
'''
try:
book['comment'] = 'Download started...'
book['comment'] = _('Download started...')
configuration = get_ffdl_config(book['url'],
options['fileform'],
@ -122,8 +117,8 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
configuration.set("overrides","never_make_cover","true")
# images only for epub, even if the user mistakenly turned it
# on else where.
# images only for epub, html, even if the user mistakenly
# turned it on else where.
if options['fileform'] not in ("epub","html"):
configuration.set("overrides","include_images","false")
@ -133,6 +128,10 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
adapter.password = book['password']
adapter.setChaptersRange(book['begin'],book['end'])
adapter.load_cookiejar(options['cookiejarfile'])
logger.debug("cookiejar:%s"%adapter.cookiejar)
adapter.set_pagecache(options['pagecache'])
story = adapter.getStoryMetadataOnly()
if 'calibre_series' in book:
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
@ -191,13 +190,13 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
# dup handling from ffdl_plugin needed for anthology updates.
if options['collision'] == UPDATE:
if chaptercount == urlchaptercount:
book['comment']="Already contains %d chapters. Reuse as is."%chaptercount
book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount
book['outfile'] = book['epub_for_update'] # for anthology merge ops.
return book
# dup handling from ffdl_plugin needed for anthology updates.
if chaptercount > urlchaptercount:
raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png')
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
if not (options['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \
and adapter.getConfig("do_update_hook"):
@ -208,16 +207,12 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
writer.writeStory(outfilename=outfile, forceOverwrite=True)
book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\
book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\
(options['fileform'],(urlchaptercount-chaptercount),urlchaptercount)
if options['smarten_punctuation'] and options['fileform'] == "epub" \
and calibre_version >= (0, 9, 39):
# do smarten_punctuation from calibre's polish feature
from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
from calibre.utils.logging import Log
from collections import namedtuple
data = {'smarten_punctuation':True}
opts = ALL_OPTS.copy()
opts.update(data)

View file

@ -59,6 +59,13 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
def use_pagecache(self):
'''
adapters that will work with the page cache need to implement
this and change it to True.
'''
return True
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):

View file

@ -52,6 +52,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# latest chapter yet and going back to chapter 1 to pull the
# chapter list doesn't get the latest. So save and use the
# original URL given to pull chapter list & metadata.
# Not used by plugin because URL gets normalized first for
# eliminating duplicate story urls.
self.origurl = url
if "https://m." in self.origurl:
## accept m(mobile)url, but use www.
@ -74,14 +76,23 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"
def _fetchUrl(self,url):
time.sleep(1.0) ## ffnet(and, I assume, fpcom) tends to fail
## more if hit too fast. This is in
## additional to what ever the
## slow_down_sleep_time setting is.
return BaseSiteAdapter._fetchUrl(self,url)
def _fetchUrl(self,url,parameters=None,extrasleep=1.0):
# time.sleep(1.0) ## ffnet(and, I assume, fpcom) tends to fail
# ## more if hit too fast. This is in
# ## additional to what ever the
# ## slow_down_sleep_time setting is.
return BaseSiteAdapter._fetchUrl(self,url,
parameters=parameters,
extrasleep=extrasleep)
def extractChapterUrlsAndMetadata(self):
def use_pagecache(self):
'''
adapters that will work with the page cache need to implement
this and change it to True.
'''
return True
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
# fetch the chapter. From that we will get almost all the
# metadata and chapter list
@ -256,14 +267,15 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
else:
self.story.setMetadata('status', 'In-Progress')
# Try the larger image first.
try:
img = soup.find('img',{'class':'lazy cimage'})
self.setCoverImage(url,img['data-original'])
except:
img = soup.find('img',{'class':'cimage'})
if img:
self.setCoverImage(url,img['src'])
if get_cover:
# Try the larger image first.
try:
img = soup.find('img',{'class':'lazy cimage'})
self.setCoverImage(url,img['data-original'])
except:
img = soup.find('img',{'class':'cimage'})
if img:
self.setCoverImage(url,img['src'])
# Find the chapter selector
select = soup.find('select', { 'name' : 'chapter' } )
@ -287,12 +299,12 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
return
def getChapterText(self, url):
time.sleep(4.0) ## ffnet(and, I assume, fpcom) tends to fail
## more if hit too fast. This is in
## additional to what ever the
## slow_down_sleep_time setting is.
# time.sleep(4.0) ## ffnet(and, I assume, fpcom) tends to fail
# ## more if hit too fast. This is in
# ## additional to what ever the
# ## slow_down_sleep_time setting is.
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self._fetchUrl(url,extrasleep=4.0)
if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url)

View file

@ -62,6 +62,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
def use_pagecache(self):
'''
adapters that will work with the page cache need to implement
this and change it to True.
'''
return True
def needToLoginCheck(self, data):
if 'Registered Users Only' in data \
or 'There is no such account on our website' in data \
@ -120,7 +127,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
url = self.url+'&index=1'+addurl
logger.debug("Changing URL: "+url)
self.performLogin(url)
data = self._fetchUrl(url)
data = self._fetchUrl(url,usecache=False)
if "This story contains mature content which may include violence, sexual situations, and coarse language" in data:
raise exceptions.AdultCheckRequired(self.url)

View file

@ -65,6 +65,13 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return r"http://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P<id>\d+)(-\d+)?(/.*)?$"
def use_pagecache(self):
'''
adapters that will work with the page cache need to implement
this and change it to True.
'''
return True
# tth won't send you future updates if you aren't 'caught up'
# on the story. Login isn't required for F21, but logging in will
# mark stories you've downloaded as 'read' on tth.
@ -136,13 +143,16 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
if self.is_adult or self.getConfig("is_adult"):
form = soup.find('form', {'id':'sitemaxratingform'})
params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
'sitemaxrating':'5'}
logger.info("Attempting to get rating cookie for %s" % url)
data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
# refetch story page.
data = self._fetchUrl(url)
soup = bs.BeautifulSoup(data)
# if is_adult and rating isn't already set to FR21, set it so.
if not form.find('option',{'value':'5'}).get('selected'):
params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
'sitemaxrating':'5'}
logger.info("Attempting to get rating cookie for %s" % url)
data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
# refetch story page.
## XXX - needs cache invalidate? Or at least check that it this needs doing...
data = self._fetchUrl(url,usecache=False)
soup = bs.BeautifulSoup(data)
if "NOTE: This story is rated FR21 which is above your chosen filter level." in data:
raise exceptions.AdultCheckRequired(self.url)

View file

@ -22,7 +22,9 @@ import logging
import urllib
import urllib2 as u2
import urlparse as up
import cookielib as cl
from functools import partial
import pickle
from .. import BeautifulSoup as bs
from ..htmlcleanup import stripHTML
@ -70,6 +72,14 @@ class BaseSiteAdapter(Configurable):
def validateURL(self):
return re.match(self.getSiteURLPattern(), self.url)
@staticmethod
def get_empty_cookiejar():
return cl.LWPCookieJar()
@staticmethod
def get_empty_pagecache():
return {}
def __init__(self, configuration, url):
Configurable.__init__(self, configuration)
@ -78,8 +88,9 @@ class BaseSiteAdapter(Configurable):
self.is_adult=False
self.override_sleep = None
self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
self.cookiejar = self.get_empty_cookiejar()
self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor())
# self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
## Specific UA because too many sites are blocking the default python UA.
self.opener.addheaders = [('User-agent', self.getConfig('user_agent'))]
self.storyDone = False
@ -95,6 +106,9 @@ class BaseSiteAdapter(Configurable):
self.oldcover = None # (data of existing cover html, data of existing cover image)
self.calibrebookmark = None
self.logfile = None
self.pagecache = self.get_empty_pagecache()
## order of preference for decoding.
self.decode = ["utf8",
"Windows-1252"] # 1252 is a superset of
@ -106,8 +120,84 @@ class BaseSiteAdapter(Configurable):
if not self.validateURL():
raise InvalidStoryURL(url,
self.getSiteDomain(),
self.getSiteExampleURLs())
self.getSiteExampleURLs())
def get_cookiejar(self):
return self.cookiejar
def set_cookiejar(self,cj):
self.cookiejar = cj
self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor())
def load_cookiejar(self,filename):
'''
Needs to be called after adapter create, but before any fetchs
are done. Takes file *name*.
'''
self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True)
# def save_cookiejar(self,filename):
# '''
# Assumed to be a FileCookieJar if self.cookiejar set.
# Takes file *name*.
# '''
# self.get_cookiejar().save(filename, ignore_discard=True, ignore_expires=True)
# def save_pagecache(self,filename):
# '''
# Writes pickle of pagecache to file *name*
# '''
# with open(filename, 'wb') as f:
# pickle.dump(self.get_pagecache(),
# f,protocol=pickle.HIGHEST_PROTOCOL)
# def load_pagecache(self,filename):
# '''
# Reads pickle of pagecache from file *name*
# '''
# with open(filename, 'rb') as f:
# self.set_pagecache(pickle.load(f))
def get_pagecache(self):
return self.pagecache
def set_pagecache(self,d):
self.pagecache=d
def _get_cachekey(self, url, parameters=None, headers=None):
keylist=[url]
if parameters != None:
keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items())))
if headers != None:
keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(headers.items())))
return '?'.join(keylist)
def _has_cachekey(self,cachekey):
return self.use_pagecache() and cachekey in self.get_pagecache()
def _get_from_pagecache(self,cachekey):
if self.use_pagecache():
return self.get_pagecache().get(cachekey)
else:
return None
def _set_to_pagecache(self,cachekey,data):
if self.use_pagecache():
self.get_pagecache()[cachekey] = data
def use_pagecache(self):
'''
adapters that will work with the page cache need to implement
this and change it to True.
'''
return False
# def story_load(self,filename):
# d = pickle.load(self.story.metadata,filename)
# self.story.metadata = d['metadata']
# self.chapterUrls = d['chapterlist']
# self.story.metadataDone = True
def _setURL(self,url):
self.url = url
self.parsedUrl = up.urlparse(url)
@ -148,8 +238,25 @@ class BaseSiteAdapter(Configurable):
return "".join([x for x in data if ord(x) < 128])
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
def _postUrl(self, url, parameters={}, headers={}):
self.do_sleep()
def _postUrl(self, url,
parameters={},
headers={},
extrasleep=None,
usecache=True):
'''
When should cache be cleared or not used? logins...
extrasleep is primarily for ffnet adapter which has extra
sleeps. Passed into fetchs so it can be bypassed when
cache hits.
'''
cachekey=self._get_cachekey(url, parameters, headers)
if usecache and self._has_cachekey(cachekey):
logger.info("#####################################\npagecache HIT: %s"%cachekey)
return self._get_from_pagecache(cachekey)
logger.info("#####################################\npagecache MISS: %s"%cachekey)
self.do_sleep(extrasleep)
## u2.Request assumes POST when data!=None. Also assumes data
## is application/x-www-form-urlencoded.
@ -160,41 +267,69 @@ class BaseSiteAdapter(Configurable):
req = u2.Request(url,
data=urllib.urlencode(parameters),
headers=headers)
return self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read())
data = self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read())
self._set_to_pagecache(cachekey,data)
return data
def _fetchUrlRaw(self, url, parameters=None):
def _fetchUrlRaw(self, url,
parameters=None,
extrasleep=None,
usecache=True):
'''
When should cache be cleared or not used? logins...
extrasleep is primarily for ffnet adapter which has extra
sleeps. Passed into fetchs so it can be bypassed when
cache hits.
'''
cachekey=self._get_cachekey(url, parameters)
if usecache and self._has_cachekey(cachekey):
logger.info("#####################################\npagecache HIT: %s"%cachekey)
return self._get_from_pagecache(cachekey)
logger.info("#####################################\npagecache MISS: %s"%cachekey)
self.do_sleep(extrasleep)
if parameters != None:
return self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))).read()
data = self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))).read()
else:
return self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))).read()
data = self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))).read()
self._set_to_pagecache(cachekey,data)
return data
def set_sleep(self,val):
print("\n===========\n set sleep time %s\n==========="%val)
self.override_sleep = val
def do_sleep(self):
def do_sleep(self,extrasleep=None):
if extrasleep:
time.sleep(float(extrasleep))
if self.override_sleep:
time.sleep(float(self.override_sleep))
elif self.getConfig('slow_down_sleep_time'):
time.sleep(float(self.getConfig('slow_down_sleep_time')))
# parameters is a dict()
def _fetchUrl(self, url, parameters=None):
self.do_sleep()
def _fetchUrl(self, url,
parameters=None,
usecache=True,
extrasleep=None):
excpt=None
for sleeptime in [0, 0.5, 4, 9]:
time.sleep(sleeptime)
try:
return self._decode(self._fetchUrlRaw(url,parameters))
return self._decode(self._fetchUrlRaw(url,
parameters=parameters,
usecache=usecache,
extrasleep=extrasleep))
except u2.HTTPError, he:
excpt=he
if he.code == 404:
logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(he)))
break # break out on 404
except Exception, e:
excpt=e
logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
# except Exception, e:
# excpt=e
# logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
logger.error("Giving up on %s" %url)
logger.exception(excpt)
@ -210,7 +345,7 @@ class BaseSiteAdapter(Configurable):
# Does the download the first time it's called.
def getStory(self):
if not self.storyDone:
self.getStoryMetadataOnly()
self.getStoryMetadataOnly(get_cover=True)
for index, (title,url) in enumerate(self.chapterUrls):
if (self.chapterFirst!=None and index < self.chapterFirst) or \
@ -253,9 +388,9 @@ class BaseSiteAdapter(Configurable):
return self.story
def getStoryMetadataOnly(self):
def getStoryMetadataOnly(self,get_cover=True):
if not self.metadataDone:
self.extractChapterUrlsAndMetadata()
self.doExtractChapterUrlsAndMetadata(get_cover=get_cover)
if not self.story.getMetadataRaw('dateUpdated'):
self.story.setMetadata('dateUpdated',self.story.getMetadataRaw('datePublished'))
@ -304,6 +439,15 @@ class BaseSiteAdapter(Configurable):
"""
return 'no such example'
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
'''
There are a handful of adapters that fetch a cover image while
collecting metadata. That isn't needed while *just*
collecting metadata in FG in plugin. Those few will override
this instead of extractChapterUrlsAndMetadata()
'''
return self.extractChapterUrlsAndMetadata()
def extractChapterUrlsAndMetadata(self):
"Needs to be overriden in each adapter class. Populates self.story metadata and self.chapterUrls"
pass