Add update chapters by remembered URLs feature.

This commit is contained in:
Jim Miller 2015-05-28 19:34:19 -05:00
parent 8e8bb93f72
commit a11d4729bd
5 changed files with 23 additions and 6 deletions

View file

@ -190,7 +190,8 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
adapter.oldimgs,
adapter.oldcover,
adapter.calibrebookmark,
adapter.logfile) = get_update_data(book['epub_for_update'])[0:7]
adapter.logfile,
adapter.oldchaptersmap) = get_update_data(book['epub_for_update'])[0:8]
# dup handling from fff_plugin needed for anthology updates.
if options['collision'] == UPDATE:

View file

@ -104,6 +104,7 @@ class BaseSiteAdapter(Configurable):
self.chapterFirst = None
self.chapterLast = None
self.oldchapters = None
self.oldchaptersmap = None
self.oldimgs = None
self.oldcover = None # (data of existing cover html, data of existing cover image)
self.calibrebookmark = None
@ -386,11 +387,17 @@ class BaseSiteAdapter(Configurable):
removeEntities(title),
None)
else:
if self.oldchapters and index < len(self.oldchapters):
data = None
if self.oldchaptersmap:
if url in self.oldchaptersmap:
data = self.utf8FromSoup(None,
self.oldchaptersmap[url],
partial(cachedfetch,self._fetchUrlRaw,self.oldimgs))
elif self.oldchapters and index < len(self.oldchapters):
data = self.utf8FromSoup(None,
self.oldchapters[index],
partial(cachedfetch,self._fetchUrlRaw,self.oldimgs))
else:
if not data:
data = self.getChapterText(url)
self.story.addChapter(url,
removeEntities(title),

View file

@ -324,7 +324,8 @@ def do_download(arg,
adapter.oldimgs,
adapter.oldcover,
adapter.calibrebookmark,
adapter.logfile) = (get_update_data(output_filename))[0:7]
adapter.logfile,
adapter.oldchaptersmap) = (get_update_data(output_filename))[0:8]
print 'Do update - epub(%d) vs url(%d)' % (chaptercount, urlchaptercount)

View file

@ -90,6 +90,7 @@ def get_update_data(inputio,
filecount = 0
soups = [] # list of xhmtl blocks
urlsoups = {} # map of xhtml blocks by url
images = {} # dict() longdesc->data
if getfilecount:
# spin through the manifest--only place there are item tags.
@ -136,6 +137,11 @@ def get_update_data(inputio,
for skip in soup.findAll(attrs={'class':'skip_on_ffdl_update'}):
skip.extract()
chapa = soup.find('a',{'class':'chapterurl'})
if chapa:
urlsoups[chapa['href']] = soup
chapa.extract()
soups.append(soup)
@ -148,7 +154,7 @@ def get_update_data(inputio,
#for k in images.keys():
#print("\tlongdesc:%s\n\tData len:%s\n"%(k,len(images[k])))
return (source,filecount,soups,images,oldcover,calibrebookmark,logfile)
return (source,filecount,soups,images,oldcover,calibrebookmark,logfile,urlsoups)
def get_path_part(n):
relpath = os.path.dirname(n)

View file

@ -654,7 +654,9 @@ div { margin: 0pt; padding: 0pt; }
if html:
logger.debug('Writing chapter text for: %s' % title)
vals={'url':url, 'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals)
fullhtml = CHAPTER_START.substitute(vals) + \
'<a href="'+url+'" class="chapterurl"></a>' + \
html + CHAPTER_END.substitute(vals)
# ffnet(& maybe others) gives the whole chapter text
# as one line. This causes problems for nook(at
# least) when the chapter size starts getting big