Add update chapters by remembered URLs feature.

2025-12-28 03:36:06 +01:00 · 2015-05-28 19:34:19 -05:00 · 2015-05-28 19:34:19 -05:00 · a11d4729bd
commit a11d4729bd
parent 8e8bb93f72
5 changed files with 23 additions and 6 deletions
--- a/calibre-plugin/jobs.py
+++ b/calibre-plugin/jobs.py
@ -190,7 +190,8 @@ def do_download_for_worker(book,options,notification=lambda x,y:x):
                 adapter.oldimgs,
                 adapter.oldcover,
                 adapter.calibrebookmark,
-                 adapter.logfile) = get_update_data(book['epub_for_update'])[0:7]
+                 adapter.logfile,
+                 adapter.oldchaptersmap) = get_update_data(book['epub_for_update'])[0:8]
    
                # dup handling from fff_plugin needed for anthology updates.
                if options['collision'] == UPDATE:
--- a/fanficfare/adapters/base_adapter.py
+++ b/fanficfare/adapters/base_adapter.py
@ -104,6 +104,7 @@ class BaseSiteAdapter(Configurable):
        self.chapterFirst = None
        self.chapterLast = None
        self.oldchapters = None
+        self.oldchaptersmap = None
        self.oldimgs = None
        self.oldcover = None # (data of existing cover html, data of existing cover image)
        self.calibrebookmark = None
@ -386,11 +387,17 @@ class BaseSiteAdapter(Configurable):
                                          removeEntities(title),
                                          None)
                else:
-                    if self.oldchapters and index < len(self.oldchapters):
+                    data = None
+                    if self.oldchaptersmap:
+                        if url in self.oldchaptersmap:
+                            data = self.utf8FromSoup(None,
+                                                     self.oldchaptersmap[url],
+                                                     partial(cachedfetch,self._fetchUrlRaw,self.oldimgs))
+                    elif self.oldchapters and index < len(self.oldchapters):
                        data = self.utf8FromSoup(None,
                                                 self.oldchapters[index],
                                                 partial(cachedfetch,self._fetchUrlRaw,self.oldimgs))
-                    else:
+                    if not data:
                        data = self.getChapterText(url)
                    self.story.addChapter(url,
                                          removeEntities(title),
--- a/fanficfare/cli.py
+++ b/fanficfare/cli.py
@ -324,7 +324,8 @@ def do_download(arg,
                 adapter.oldimgs,
                 adapter.oldcover,
                 adapter.calibrebookmark,
-                 adapter.logfile) = (get_update_data(output_filename))[0:7]
+                 adapter.logfile,
+                 adapter.oldchaptersmap) = (get_update_data(output_filename))[0:8]

                print 'Do update - epub(%d) vs url(%d)' % (chaptercount, urlchaptercount)

--- a/fanficfare/epubutils.py
+++ b/fanficfare/epubutils.py
@ -90,6 +90,7 @@ def get_update_data(inputio,

    filecount = 0
    soups = [] # list of xhmtl blocks
+    urlsoups = {} # map of xhtml blocks by url
    images = {} # dict() longdesc->data
    if getfilecount:
        # spin through the manifest--only place there are item tags.
@ -136,6 +137,11 @@ def get_update_data(inputio,

                        for skip in soup.findAll(attrs={'class':'skip_on_ffdl_update'}):
                            skip.extract()
+
+                        chapa = soup.find('a',{'class':'chapterurl'})
+                        if chapa:
+                            urlsoups[chapa['href']] = soup
+                            chapa.extract()
                            
                        soups.append(soup)
                        
@ -148,7 +154,7 @@ def get_update_data(inputio,
                    
    #for k in images.keys():
        #print("\tlongdesc:%s\n\tData len:%s\n"%(k,len(images[k])))
-    return (source,filecount,soups,images,oldcover,calibrebookmark,logfile)
+    return (source,filecount,soups,images,oldcover,calibrebookmark,logfile,urlsoups)

 def get_path_part(n):
    relpath = os.path.dirname(n)
--- a/fanficfare/writers/writer_epub.py
+++ b/fanficfare/writers/writer_epub.py
@ -654,7 +654,9 @@ div { margin: 0pt; padding: 0pt; }
            if html:
                logger.debug('Writing chapter text for: %s' % title)
                vals={'url':url, 'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
-                fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals)
+                fullhtml = CHAPTER_START.substitute(vals) + \
+                    '<a href="'+url+'" class="chapterurl"></a>' + \
+                    html + CHAPTER_END.substitute(vals)
                # ffnet(& maybe others) gives the whole chapter text
                # as one line.  This causes problems for nook(at
                # least) when the chapter size starts getting big