diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 5b7788cf..91993a6f 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -190,7 +190,8 @@ def do_download_for_worker(book,options,notification=lambda x,y:x): adapter.oldimgs, adapter.oldcover, adapter.calibrebookmark, - adapter.logfile) = get_update_data(book['epub_for_update'])[0:7] + adapter.logfile, + adapter.oldchaptersmap) = get_update_data(book['epub_for_update'])[0:8] # dup handling from fff_plugin needed for anthology updates. if options['collision'] == UPDATE: diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py index 61ca68a0..c1f754ac 100644 --- a/fanficfare/adapters/base_adapter.py +++ b/fanficfare/adapters/base_adapter.py @@ -104,6 +104,7 @@ class BaseSiteAdapter(Configurable): self.chapterFirst = None self.chapterLast = None self.oldchapters = None + self.oldchaptersmap = None self.oldimgs = None self.oldcover = None # (data of existing cover html, data of existing cover image) self.calibrebookmark = None @@ -386,11 +387,17 @@ class BaseSiteAdapter(Configurable): removeEntities(title), None) else: - if self.oldchapters and index < len(self.oldchapters): + data = None + if self.oldchaptersmap: + if url in self.oldchaptersmap: + data = self.utf8FromSoup(None, + self.oldchaptersmap[url], + partial(cachedfetch,self._fetchUrlRaw,self.oldimgs)) + elif self.oldchapters and index < len(self.oldchapters): data = self.utf8FromSoup(None, self.oldchapters[index], partial(cachedfetch,self._fetchUrlRaw,self.oldimgs)) - else: + if not data: data = self.getChapterText(url) self.story.addChapter(url, removeEntities(title), diff --git a/fanficfare/cli.py b/fanficfare/cli.py index a14f9de3..0856ce7d 100644 --- a/fanficfare/cli.py +++ b/fanficfare/cli.py @@ -324,7 +324,8 @@ def do_download(arg, adapter.oldimgs, adapter.oldcover, adapter.calibrebookmark, - adapter.logfile) = (get_update_data(output_filename))[0:7] + adapter.logfile, + adapter.oldchaptersmap) = (get_update_data(output_filename))[0:8] print 'Do update - epub(%d) vs url(%d)' % (chaptercount, urlchaptercount) diff --git a/fanficfare/epubutils.py b/fanficfare/epubutils.py index 0436656f..59778760 100644 --- a/fanficfare/epubutils.py +++ b/fanficfare/epubutils.py @@ -90,6 +90,7 @@ def get_update_data(inputio, filecount = 0 soups = [] # list of xhmtl blocks + urlsoups = {} # map of xhtml blocks by url images = {} # dict() longdesc->data if getfilecount: # spin through the manifest--only place there are item tags. @@ -136,6 +137,11 @@ def get_update_data(inputio, for skip in soup.findAll(attrs={'class':'skip_on_ffdl_update'}): skip.extract() + + chapa = soup.find('a',{'class':'chapterurl'}) + if chapa: + urlsoups[chapa['href']] = soup + chapa.extract() soups.append(soup) @@ -148,7 +154,7 @@ def get_update_data(inputio, #for k in images.keys(): #print("\tlongdesc:%s\n\tData len:%s\n"%(k,len(images[k]))) - return (source,filecount,soups,images,oldcover,calibrebookmark,logfile) + return (source,filecount,soups,images,oldcover,calibrebookmark,logfile,urlsoups) def get_path_part(n): relpath = os.path.dirname(n) diff --git a/fanficfare/writers/writer_epub.py b/fanficfare/writers/writer_epub.py index abbb646b..52deae9d 100644 --- a/fanficfare/writers/writer_epub.py +++ b/fanficfare/writers/writer_epub.py @@ -654,7 +654,9 @@ div { margin: 0pt; padding: 0pt; } if html: logger.debug('Writing chapter text for: %s' % title) vals={'url':url, 'chapter':title, 'index':"%04d"%(index+1), 'number':index+1} - fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals) + fullhtml = CHAPTER_START.substitute(vals) + \ + '' + \ + html + CHAPTER_END.substitute(vals) # ffnet(& maybe others) gives the whole chapter text # as one line. This causes problems for nook(at # least) when the chapter size starts getting big