Add include_images support for html format.

Pass class & id attributes on html tags. In Previous (rev:696) - Fix for epub cover when cover_exclusion_regexp skips 1st img. Don't add chapter number when only one chapter.
2026-05-08 12:36:11 +02:00 · 2012-10-14 11:23:20 -05:00 · 2012-10-14 11:23:20 -05:00 · 4d96632b67
commit 4d96632b67
parent 60b7eae72e
8 changed files with 122 additions and 35 deletions
--- a/calibre-plugin/jobs.py
+++ b/calibre-plugin/jobs.py
@ -123,7 +123,7 @@ def do_download_for_worker(book,options):

        # images only for epub, even if the user mistakenly turned it
        # on else where.
-        if options['fileform'] != "epub":
+        if options['fileform'] not in ("epub","html"):
            configuration.set("overrides","include_images","false")
        
        adapter = adapters.getAdapter(configuration,book['url'])
--- a/defaults.ini
+++ b/defaults.ini
@ -192,6 +192,7 @@ keep_summary_html:true
 strip_chapter_numbers:false

 ## add_chapter_numbers can be true, false or toconly
+## (Note number is not added when there's only one chapter.)
 add_chapter_numbers:false

 ## (Two versions of chapter_title_strip_pattern are shown below.  You
@ -222,6 +223,22 @@ chapter_title_add_pattern:${index}. ${title}
 ## Each output format has a section that overrides [defaults]
 [html]

+## include images from img tags in the body and summary of
+## stories.  Images will be converted to jpg for size if possible.
+## include_images is *only* available in epub and html output formats.
+## include_images is *not* available in the web service in any format.
+#include_images:false
+
+## Note that it's *highly* recommended to use zipfile output or story
+## unique destination directories to avoid overwriting images.
+#output_filename: books/${author}/${title}/${title}-${siteabbrev}_${authorId}_${storyId}${formatext}
+#zip_output: false
+
+## This switch prevents FFDL from doing any processing on the images.
+## Usually they would be converted to jpg, resized and optionally made
+## grayscale.
+no_image_processing: true
+
 ## output background color--only used by html and epub (and ignored in
 ## epub by many readers).  Included below in output_css--will be
 ## ignored if not in output_css.
@ -324,7 +341,7 @@ output_css:

 ## include images from img tags in the body and summary of
 ## stories.  Images will be converted to jpg for size if possible.
-## include_images is *only* available in epub output format.
+## include_images is *only* available in epub and html output format.
 ## include_images is *not* available in the web service in any format.
 #include_images:false

--- a/downloader.py
+++ b/downloader.py
@ -138,7 +138,7 @@ def main():

   # images only for epub, even if the user mistakenly turned it
   # on else where.
-   if options.format != "epub":
+   if options.format not in ("epub","html"):
       configuration.set("overrides","include_images","false")
       
   if options.options:
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@ -304,7 +304,7 @@ class BaseSiteAdapter(Configurable):
        if not fetch:
            fetch=self._fetchUrlRaw

-        acceptable_attributes = ['href','name']
+        acceptable_attributes = ['href','name','class','id']
        #print("include_images:"+self.getConfig('include_images'))
        if self.getConfig('include_images'):
            acceptable_attributes.extend(('src','alt','longdesc'))
--- a/fanficdownloader/story.py
+++ b/fanficdownloader/story.py
@ -20,6 +20,8 @@ import urlparse
 import string
 from math import floor
 from functools import partial
+import logging
+import urlparse as up

 import exceptions
 from htmlcleanup import conditionalRemoveEntities, removeAllEntities
@ -52,7 +54,7 @@ try:
        if export:
            return (img.export('JPG'),'jpg','image/jpeg')
        else:
-            print("image used unchanged")
+            logging.debug("image used unchanged")
            return (data,'jpg','image/jpeg')
        
 except:
@ -88,23 +90,34 @@ except:
                img.save(outsio,'JPEG')
                return (outsio.getvalue(),'jpg','image/jpeg')
            else:
-                print("image used unchanged")
+                logging.debug("image used unchanged")
                return (data,'jpg','image/jpeg')
        
    except:
-
        # No calibre or PIL, simple pass through with mimetype.
-        imagetypes = {
-            'jpg':'image/jpeg',
-            'jpeg':'image/jpeg',
-            'png':'image/png',
-            'gif':'image/gif',
-            'svg':'image/svg+xml',
-            }
-
        def convert_image(url,data,sizes,grayscale):
-            ext=url[url.rfind('.')+1:].lower()
-            return (data,ext,imagetypes[ext])
+            return no_convert_image(url,data)
+        
+imagetypes = {
+    'jpg':'image/jpeg',
+    'jpeg':'image/jpeg',
+    'png':'image/png',
+    'gif':'image/gif',
+    'svg':'image/svg+xml',
+    }
+
+## also used for explicit no image processing.
+def no_convert_image(url,data):
+    parsedUrl = up.urlparse(url)
+    
+    ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower()
+    
+    if ext not in imagetypes:
+        logging.debug("no_convert_image url:%s - no known extension"%url)
+        # doesn't have extension? use jpg.
+        ext='jpg'
+        
+    return (data,ext,imagetypes[ext])
        
 def normalize_format_name(fmt):
    if fmt:
@ -483,17 +496,22 @@ class Story(Configurable):
        prefix='ffdl'
        if imgurl not in self.imgurls:
            parsedUrl = urlparse.urlparse(imgurl)
+
            try:
-                sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
+                if self.getConfig('no_image_processing'):
+                    (data,ext,mime) = no_convert_image(imgurl,
+                                                   fetch(imgurl))
+                else:
+                    try:
+                        sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
+                    except Exception, e:
+                        raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
+                    (data,ext,mime) = convert_image(imgurl,
+                                                    fetch(imgurl),
+                                                    sizes,
+                                                    self.getConfig('grayscale_images'))
            except Exception, e:
-                raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
-            try:
-                (data,ext,mime) = convert_image(imgurl,
-                                                fetch(imgurl),
-                                                sizes,
-                                                self.getConfig('grayscale_images'))
-            except Exception, e:
-                print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
+                logging.info("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
                return "failedtoload"
            
            # explicit cover, make the first image.
@ -528,7 +546,7 @@ class Story(Configurable):
                    ext)
                self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
                
-            print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
+            logging.debug("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
        else:
            newsrc = self.imgtuples[self.imgurls.index(imgurl)]['newsrc']
            
--- a/fanficdownloader/writers/base_writer.py
+++ b/fanficdownloader/writers/base_writer.py
@ -190,6 +190,8 @@ class BaseStoryWriter(Configurable):
        if outfilename == None:
            outfilename=self.getOutputFileName()

+        self.outfilename = outfilename
+
        # minor cheat, tucking css into metadata.
        if self.getConfig("output_css"):
            self.story.setMetadata("output_css",
@ -203,8 +205,8 @@ class BaseStoryWriter(Configurable):
            logging.info("Save directly to file: %s" % outfilename)
            if self.getConfig('make_directories'):
                path=""
-                dirs = os.path.dirname(outfilename).split('/')
-                for dir in dirs:
+                outputdirs = os.path.dirname(outfilename).split('/')
+                for dir in outputdirs:
                    path+=dir+"/"
                    if not os.path.exists(path):
                        os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2?
@ -238,14 +240,14 @@ class BaseStoryWriter(Configurable):
                                                 # fetch once.
        if self.getConfig('zip_output'):
            out = StringIO.StringIO()
+            self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
            self.writeStoryImpl(out)
-            zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
-            zipout.writestr(self.getBaseFileName(),out.getvalue())
+            self.zipout.writestr(self.getBaseFileName(),out.getvalue())
            # declares all the files created by Windows.  otherwise, when
            # it runs in appengine, windows unzips the files as 000 perms.
-            for zf in zipout.filelist:
+            for zf in self.zipout.filelist:
                zf.create_system = 0
-            zipout.close()
+            self.zipout.close()
            out.close()
        else:
            self.writeStoryImpl(outstream)
@ -253,6 +255,27 @@ class BaseStoryWriter(Configurable):
        if close:
            outstream.close()

+    def writeFile(self, filename, data):
+        logging.debug("writeFile:%s"%filename)
+        
+        if self.getConfig('zip_output'):
+            outputdirs = os.path.dirname(self.getBaseFileName())
+            if outputdirs:
+                filename=outputdirs+'/'+filename
+            self.zipout.writestr(filename,data)
+        else:
+            outputdirs = os.path.dirname(self.outfilename)
+            if outputdirs:
+                filename=outputdirs+'/'+filename
+
+            dir = os.path.dirname(filename)
+            if not os.path.exists(dir):
+                os.mkdir(dir) ## os.makedirs() doesn't work in 2.5.2?
+                    
+            outstream = open(filename,"wb")
+            outstream.write(data)
+            outstream.close()
+
    def writeStoryImpl(self, out):
        "Must be overriden by sub classes."
        pass
--- a/fanficdownloader/writers/writer_html.py
+++ b/fanficdownloader/writers/writer_html.py
@ -46,6 +46,10 @@ ${output_css}
 <h1><a href="${storyUrl}">${title}</a> by ${authorHTML}</h1>
 ''')

+        self.HTML_COVER = string.Template('''
+<img src="${coverimg}" alt="cover" />
+''')
+        
        self.HTML_TITLE_PAGE_START = string.Template('''
 <table class="full">
 ''')
@ -84,11 +88,16 @@ ${output_css}

    def writeStoryImpl(self, out):

+        if self.hasConfig("cover_content"):
+            COVER = string.Template(self.getConfig("cover_content"))
+        else:
+            COVER = self.HTML_COVER
+
        if self.hasConfig('file_start'):
            FILE_START = string.Template(self.getConfig("file_start"))
        else:
            FILE_START = self.HTML_FILE_START
-        
+
        if self.hasConfig('file_end'):
            FILE_END = string.Template(self.getConfig("file_end"))
        else:
@ -96,6 +105,9 @@ ${output_css}
        
        self._write(out,FILE_START.substitute(self.story.getAllMetadata()))

+        if self.getConfig('include_images') and self.story.cover:
+            self._write(out,COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))
+            
        self.writeTitlePage(out,
                            self.HTML_TITLE_PAGE_START,
                            self.HTML_TITLE_ENTRY,
@ -125,3 +137,8 @@ ${output_css}
                self._write(out,CHAPTER_END.substitute(vals))

        self._write(out,FILE_END.substitute(self.story.getAllMetadata()))
+
+        if self.getConfig('include_images'):
+            for imgmap in self.story.getImgUrls():
+                self.writeFile(imgmap['newsrc'],imgmap['data'])
+        
--- a/plugin-defaults.ini
+++ b/plugin-defaults.ini
@ -176,6 +176,7 @@ keep_summary_html:true
 strip_chapter_numbers:false

 ## add_chapter_numbers can be true, false or toconly
+## (Note number is not added when there's only one chapter.)
 add_chapter_numbers:false

 ## (Two versions of chapter_title_strip_pattern are shown below.  You
@ -206,6 +207,17 @@ chapter_title_add_pattern:${index}. ${title}
 ## Each output format has a section that overrides [defaults]
 [html]

+## include images from img tags in the body and summary of
+## stories.  Images will be converted to jpg for size if possible.
+## include_images is *only* available in epub and html output formats.
+## include_images is *not* available in the web service in any format.
+#include_images:false
+
+## This switch prevents FFDL from doing any processing on the images.
+## Usually they would be converted to jpg, resized and optionally made
+## grayscale.
+no_image_processing: true
+
 ## output background color--only used by html and epub (and ignored in
 ## epub by many readers).  Included below in output_css--will be
 ## ignored if not in output_css.
@ -305,7 +317,7 @@ output_css:

 ## include images from img tags in the body and summary of
 ## stories.  Images will be converted to jpg for size if possible.
-## include_images is *only* available in epub output format.
+## include_images is *only* available in epub and html output format.
 #include_images:false

 ## If set, the first image found will be made the cover image.  If