diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 2fad2e10..eced670b 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -123,7 +123,7 @@ def do_download_for_worker(book,options): # images only for epub, even if the user mistakenly turned it # on else where. - if options['fileform'] != "epub": + if options['fileform'] not in ("epub","html"): configuration.set("overrides","include_images","false") adapter = adapters.getAdapter(configuration,book['url']) diff --git a/defaults.ini b/defaults.ini index c00a6007..69fd76d1 100644 --- a/defaults.ini +++ b/defaults.ini @@ -192,6 +192,7 @@ keep_summary_html:true strip_chapter_numbers:false ## add_chapter_numbers can be true, false or toconly +## (Note number is not added when there's only one chapter.) add_chapter_numbers:false ## (Two versions of chapter_title_strip_pattern are shown below. You @@ -222,6 +223,22 @@ chapter_title_add_pattern:${index}. ${title} ## Each output format has a section that overrides [defaults] [html] +## include images from img tags in the body and summary of +## stories. Images will be converted to jpg for size if possible. +## include_images is *only* available in epub and html output formats. +## include_images is *not* available in the web service in any format. +#include_images:false + +## Note that it's *highly* recommended to use zipfile output or story +## unique destination directories to avoid overwriting images. +#output_filename: books/${author}/${title}/${title}-${siteabbrev}_${authorId}_${storyId}${formatext} +#zip_output: false + +## This switch prevents FFDL from doing any processing on the images. +## Usually they would be converted to jpg, resized and optionally made +## grayscale. +no_image_processing: true + ## output background color--only used by html and epub (and ignored in ## epub by many readers). Included below in output_css--will be ## ignored if not in output_css. @@ -324,7 +341,7 @@ output_css: ## include images from img tags in the body and summary of ## stories. Images will be converted to jpg for size if possible. -## include_images is *only* available in epub output format. +## include_images is *only* available in epub and html output format. ## include_images is *not* available in the web service in any format. #include_images:false diff --git a/downloader.py b/downloader.py index c809d191..2d01c629 100644 --- a/downloader.py +++ b/downloader.py @@ -138,7 +138,7 @@ def main(): # images only for epub, even if the user mistakenly turned it # on else where. - if options.format != "epub": + if options.format not in ("epub","html"): configuration.set("overrides","include_images","false") if options.options: diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 7ca9686f..039bdee2 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -304,7 +304,7 @@ class BaseSiteAdapter(Configurable): if not fetch: fetch=self._fetchUrlRaw - acceptable_attributes = ['href','name'] + acceptable_attributes = ['href','name','class','id'] #print("include_images:"+self.getConfig('include_images')) if self.getConfig('include_images'): acceptable_attributes.extend(('src','alt','longdesc')) diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index b6897d2f..a5731f34 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -20,6 +20,8 @@ import urlparse import string from math import floor from functools import partial +import logging +import urlparse as up import exceptions from htmlcleanup import conditionalRemoveEntities, removeAllEntities @@ -52,7 +54,7 @@ try: if export: return (img.export('JPG'),'jpg','image/jpeg') else: - print("image used unchanged") + logging.debug("image used unchanged") return (data,'jpg','image/jpeg') except: @@ -88,23 +90,34 @@ except: img.save(outsio,'JPEG') return (outsio.getvalue(),'jpg','image/jpeg') else: - print("image used unchanged") + logging.debug("image used unchanged") return (data,'jpg','image/jpeg') except: - # No calibre or PIL, simple pass through with mimetype. - imagetypes = { - 'jpg':'image/jpeg', - 'jpeg':'image/jpeg', - 'png':'image/png', - 'gif':'image/gif', - 'svg':'image/svg+xml', - } - def convert_image(url,data,sizes,grayscale): - ext=url[url.rfind('.')+1:].lower() - return (data,ext,imagetypes[ext]) + return no_convert_image(url,data) + +imagetypes = { + 'jpg':'image/jpeg', + 'jpeg':'image/jpeg', + 'png':'image/png', + 'gif':'image/gif', + 'svg':'image/svg+xml', + } + +## also used for explicit no image processing. +def no_convert_image(url,data): + parsedUrl = up.urlparse(url) + + ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower() + + if ext not in imagetypes: + logging.debug("no_convert_image url:%s - no known extension"%url) + # doesn't have extension? use jpg. + ext='jpg' + + return (data,ext,imagetypes[ext]) def normalize_format_name(fmt): if fmt: @@ -483,17 +496,22 @@ class Story(Configurable): prefix='ffdl' if imgurl not in self.imgurls: parsedUrl = urlparse.urlparse(imgurl) + try: - sizes = [ int(x) for x in self.getConfigList('image_max_size') ] + if self.getConfig('no_image_processing'): + (data,ext,mime) = no_convert_image(imgurl, + fetch(imgurl)) + else: + try: + sizes = [ int(x) for x in self.getConfigList('image_max_size') ] + except Exception, e: + raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e)) + (data,ext,mime) = convert_image(imgurl, + fetch(imgurl), + sizes, + self.getConfig('grayscale_images')) except Exception, e: - raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e)) - try: - (data,ext,mime) = convert_image(imgurl, - fetch(imgurl), - sizes, - self.getConfig('grayscale_images')) - except Exception, e: - print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e)) + logging.info("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e)) return "failedtoload" # explicit cover, make the first image. @@ -528,7 +546,7 @@ class Story(Configurable): ext) self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data}) - print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data))) + logging.debug("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data))) else: newsrc = self.imgtuples[self.imgurls.index(imgurl)]['newsrc'] diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 64f8b70e..a1999fca 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -190,6 +190,8 @@ class BaseStoryWriter(Configurable): if outfilename == None: outfilename=self.getOutputFileName() + self.outfilename = outfilename + # minor cheat, tucking css into metadata. if self.getConfig("output_css"): self.story.setMetadata("output_css", @@ -203,8 +205,8 @@ class BaseStoryWriter(Configurable): logging.info("Save directly to file: %s" % outfilename) if self.getConfig('make_directories'): path="" - dirs = os.path.dirname(outfilename).split('/') - for dir in dirs: + outputdirs = os.path.dirname(outfilename).split('/') + for dir in outputdirs: path+=dir+"/" if not os.path.exists(path): os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2? @@ -238,14 +240,14 @@ class BaseStoryWriter(Configurable): # fetch once. if self.getConfig('zip_output'): out = StringIO.StringIO() + self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED) self.writeStoryImpl(out) - zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED) - zipout.writestr(self.getBaseFileName(),out.getvalue()) + self.zipout.writestr(self.getBaseFileName(),out.getvalue()) # declares all the files created by Windows. otherwise, when # it runs in appengine, windows unzips the files as 000 perms. - for zf in zipout.filelist: + for zf in self.zipout.filelist: zf.create_system = 0 - zipout.close() + self.zipout.close() out.close() else: self.writeStoryImpl(outstream) @@ -253,6 +255,27 @@ class BaseStoryWriter(Configurable): if close: outstream.close() + def writeFile(self, filename, data): + logging.debug("writeFile:%s"%filename) + + if self.getConfig('zip_output'): + outputdirs = os.path.dirname(self.getBaseFileName()) + if outputdirs: + filename=outputdirs+'/'+filename + self.zipout.writestr(filename,data) + else: + outputdirs = os.path.dirname(self.outfilename) + if outputdirs: + filename=outputdirs+'/'+filename + + dir = os.path.dirname(filename) + if not os.path.exists(dir): + os.mkdir(dir) ## os.makedirs() doesn't work in 2.5.2? + + outstream = open(filename,"wb") + outstream.write(data) + outstream.close() + def writeStoryImpl(self, out): "Must be overriden by sub classes." pass diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py index 194a02f2..0224dfcc 100644 --- a/fanficdownloader/writers/writer_html.py +++ b/fanficdownloader/writers/writer_html.py @@ -46,6 +46,10 @@ ${output_css}