mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-08 12:36:11 +02:00
Add include_images support for html format.
Pass class & id attributes on html tags. In Previous (rev:696) - Fix for epub cover when cover_exclusion_regexp skips 1st img. Don't add chapter number when only one chapter.
This commit is contained in:
parent
60b7eae72e
commit
4d96632b67
8 changed files with 122 additions and 35 deletions
|
|
@ -123,7 +123,7 @@ def do_download_for_worker(book,options):
|
|||
|
||||
# images only for epub, even if the user mistakenly turned it
|
||||
# on else where.
|
||||
if options['fileform'] != "epub":
|
||||
if options['fileform'] not in ("epub","html"):
|
||||
configuration.set("overrides","include_images","false")
|
||||
|
||||
adapter = adapters.getAdapter(configuration,book['url'])
|
||||
|
|
|
|||
19
defaults.ini
19
defaults.ini
|
|
@ -192,6 +192,7 @@ keep_summary_html:true
|
|||
strip_chapter_numbers:false
|
||||
|
||||
## add_chapter_numbers can be true, false or toconly
|
||||
## (Note number is not added when there's only one chapter.)
|
||||
add_chapter_numbers:false
|
||||
|
||||
## (Two versions of chapter_title_strip_pattern are shown below. You
|
||||
|
|
@ -222,6 +223,22 @@ chapter_title_add_pattern:${index}. ${title}
|
|||
## Each output format has a section that overrides [defaults]
|
||||
[html]
|
||||
|
||||
## include images from img tags in the body and summary of
|
||||
## stories. Images will be converted to jpg for size if possible.
|
||||
## include_images is *only* available in epub and html output formats.
|
||||
## include_images is *not* available in the web service in any format.
|
||||
#include_images:false
|
||||
|
||||
## Note that it's *highly* recommended to use zipfile output or story
|
||||
## unique destination directories to avoid overwriting images.
|
||||
#output_filename: books/${author}/${title}/${title}-${siteabbrev}_${authorId}_${storyId}${formatext}
|
||||
#zip_output: false
|
||||
|
||||
## This switch prevents FFDL from doing any processing on the images.
|
||||
## Usually they would be converted to jpg, resized and optionally made
|
||||
## grayscale.
|
||||
no_image_processing: true
|
||||
|
||||
## output background color--only used by html and epub (and ignored in
|
||||
## epub by many readers). Included below in output_css--will be
|
||||
## ignored if not in output_css.
|
||||
|
|
@ -324,7 +341,7 @@ output_css:
|
|||
|
||||
## include images from img tags in the body and summary of
|
||||
## stories. Images will be converted to jpg for size if possible.
|
||||
## include_images is *only* available in epub output format.
|
||||
## include_images is *only* available in epub and html output format.
|
||||
## include_images is *not* available in the web service in any format.
|
||||
#include_images:false
|
||||
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ def main():
|
|||
|
||||
# images only for epub, even if the user mistakenly turned it
|
||||
# on else where.
|
||||
if options.format != "epub":
|
||||
if options.format not in ("epub","html"):
|
||||
configuration.set("overrides","include_images","false")
|
||||
|
||||
if options.options:
|
||||
|
|
|
|||
|
|
@ -304,7 +304,7 @@ class BaseSiteAdapter(Configurable):
|
|||
if not fetch:
|
||||
fetch=self._fetchUrlRaw
|
||||
|
||||
acceptable_attributes = ['href','name']
|
||||
acceptable_attributes = ['href','name','class','id']
|
||||
#print("include_images:"+self.getConfig('include_images'))
|
||||
if self.getConfig('include_images'):
|
||||
acceptable_attributes.extend(('src','alt','longdesc'))
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ import urlparse
|
|||
import string
|
||||
from math import floor
|
||||
from functools import partial
|
||||
import logging
|
||||
import urlparse as up
|
||||
|
||||
import exceptions
|
||||
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
|
||||
|
|
@ -52,7 +54,7 @@ try:
|
|||
if export:
|
||||
return (img.export('JPG'),'jpg','image/jpeg')
|
||||
else:
|
||||
print("image used unchanged")
|
||||
logging.debug("image used unchanged")
|
||||
return (data,'jpg','image/jpeg')
|
||||
|
||||
except:
|
||||
|
|
@ -88,23 +90,34 @@ except:
|
|||
img.save(outsio,'JPEG')
|
||||
return (outsio.getvalue(),'jpg','image/jpeg')
|
||||
else:
|
||||
print("image used unchanged")
|
||||
logging.debug("image used unchanged")
|
||||
return (data,'jpg','image/jpeg')
|
||||
|
||||
except:
|
||||
|
||||
# No calibre or PIL, simple pass through with mimetype.
|
||||
imagetypes = {
|
||||
'jpg':'image/jpeg',
|
||||
'jpeg':'image/jpeg',
|
||||
'png':'image/png',
|
||||
'gif':'image/gif',
|
||||
'svg':'image/svg+xml',
|
||||
}
|
||||
|
||||
def convert_image(url,data,sizes,grayscale):
|
||||
ext=url[url.rfind('.')+1:].lower()
|
||||
return (data,ext,imagetypes[ext])
|
||||
return no_convert_image(url,data)
|
||||
|
||||
imagetypes = {
|
||||
'jpg':'image/jpeg',
|
||||
'jpeg':'image/jpeg',
|
||||
'png':'image/png',
|
||||
'gif':'image/gif',
|
||||
'svg':'image/svg+xml',
|
||||
}
|
||||
|
||||
## also used for explicit no image processing.
|
||||
def no_convert_image(url,data):
|
||||
parsedUrl = up.urlparse(url)
|
||||
|
||||
ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower()
|
||||
|
||||
if ext not in imagetypes:
|
||||
logging.debug("no_convert_image url:%s - no known extension"%url)
|
||||
# doesn't have extension? use jpg.
|
||||
ext='jpg'
|
||||
|
||||
return (data,ext,imagetypes[ext])
|
||||
|
||||
def normalize_format_name(fmt):
|
||||
if fmt:
|
||||
|
|
@ -483,17 +496,22 @@ class Story(Configurable):
|
|||
prefix='ffdl'
|
||||
if imgurl not in self.imgurls:
|
||||
parsedUrl = urlparse.urlparse(imgurl)
|
||||
|
||||
try:
|
||||
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
|
||||
if self.getConfig('no_image_processing'):
|
||||
(data,ext,mime) = no_convert_image(imgurl,
|
||||
fetch(imgurl))
|
||||
else:
|
||||
try:
|
||||
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
|
||||
except Exception, e:
|
||||
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
|
||||
(data,ext,mime) = convert_image(imgurl,
|
||||
fetch(imgurl),
|
||||
sizes,
|
||||
self.getConfig('grayscale_images'))
|
||||
except Exception, e:
|
||||
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
|
||||
try:
|
||||
(data,ext,mime) = convert_image(imgurl,
|
||||
fetch(imgurl),
|
||||
sizes,
|
||||
self.getConfig('grayscale_images'))
|
||||
except Exception, e:
|
||||
print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
|
||||
logging.info("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
|
||||
return "failedtoload"
|
||||
|
||||
# explicit cover, make the first image.
|
||||
|
|
@ -528,7 +546,7 @@ class Story(Configurable):
|
|||
ext)
|
||||
self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
|
||||
|
||||
print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
|
||||
logging.debug("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
|
||||
else:
|
||||
newsrc = self.imgtuples[self.imgurls.index(imgurl)]['newsrc']
|
||||
|
||||
|
|
|
|||
|
|
@ -190,6 +190,8 @@ class BaseStoryWriter(Configurable):
|
|||
if outfilename == None:
|
||||
outfilename=self.getOutputFileName()
|
||||
|
||||
self.outfilename = outfilename
|
||||
|
||||
# minor cheat, tucking css into metadata.
|
||||
if self.getConfig("output_css"):
|
||||
self.story.setMetadata("output_css",
|
||||
|
|
@ -203,8 +205,8 @@ class BaseStoryWriter(Configurable):
|
|||
logging.info("Save directly to file: %s" % outfilename)
|
||||
if self.getConfig('make_directories'):
|
||||
path=""
|
||||
dirs = os.path.dirname(outfilename).split('/')
|
||||
for dir in dirs:
|
||||
outputdirs = os.path.dirname(outfilename).split('/')
|
||||
for dir in outputdirs:
|
||||
path+=dir+"/"
|
||||
if not os.path.exists(path):
|
||||
os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2?
|
||||
|
|
@ -238,14 +240,14 @@ class BaseStoryWriter(Configurable):
|
|||
# fetch once.
|
||||
if self.getConfig('zip_output'):
|
||||
out = StringIO.StringIO()
|
||||
self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
|
||||
self.writeStoryImpl(out)
|
||||
zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
|
||||
zipout.writestr(self.getBaseFileName(),out.getvalue())
|
||||
self.zipout.writestr(self.getBaseFileName(),out.getvalue())
|
||||
# declares all the files created by Windows. otherwise, when
|
||||
# it runs in appengine, windows unzips the files as 000 perms.
|
||||
for zf in zipout.filelist:
|
||||
for zf in self.zipout.filelist:
|
||||
zf.create_system = 0
|
||||
zipout.close()
|
||||
self.zipout.close()
|
||||
out.close()
|
||||
else:
|
||||
self.writeStoryImpl(outstream)
|
||||
|
|
@ -253,6 +255,27 @@ class BaseStoryWriter(Configurable):
|
|||
if close:
|
||||
outstream.close()
|
||||
|
||||
def writeFile(self, filename, data):
|
||||
logging.debug("writeFile:%s"%filename)
|
||||
|
||||
if self.getConfig('zip_output'):
|
||||
outputdirs = os.path.dirname(self.getBaseFileName())
|
||||
if outputdirs:
|
||||
filename=outputdirs+'/'+filename
|
||||
self.zipout.writestr(filename,data)
|
||||
else:
|
||||
outputdirs = os.path.dirname(self.outfilename)
|
||||
if outputdirs:
|
||||
filename=outputdirs+'/'+filename
|
||||
|
||||
dir = os.path.dirname(filename)
|
||||
if not os.path.exists(dir):
|
||||
os.mkdir(dir) ## os.makedirs() doesn't work in 2.5.2?
|
||||
|
||||
outstream = open(filename,"wb")
|
||||
outstream.write(data)
|
||||
outstream.close()
|
||||
|
||||
def writeStoryImpl(self, out):
|
||||
"Must be overriden by sub classes."
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -46,6 +46,10 @@ ${output_css}
|
|||
<h1><a href="${storyUrl}">${title}</a> by ${authorHTML}</h1>
|
||||
''')
|
||||
|
||||
self.HTML_COVER = string.Template('''
|
||||
<img src="${coverimg}" alt="cover" />
|
||||
''')
|
||||
|
||||
self.HTML_TITLE_PAGE_START = string.Template('''
|
||||
<table class="full">
|
||||
''')
|
||||
|
|
@ -84,11 +88,16 @@ ${output_css}
|
|||
|
||||
def writeStoryImpl(self, out):
|
||||
|
||||
if self.hasConfig("cover_content"):
|
||||
COVER = string.Template(self.getConfig("cover_content"))
|
||||
else:
|
||||
COVER = self.HTML_COVER
|
||||
|
||||
if self.hasConfig('file_start'):
|
||||
FILE_START = string.Template(self.getConfig("file_start"))
|
||||
else:
|
||||
FILE_START = self.HTML_FILE_START
|
||||
|
||||
|
||||
if self.hasConfig('file_end'):
|
||||
FILE_END = string.Template(self.getConfig("file_end"))
|
||||
else:
|
||||
|
|
@ -96,6 +105,9 @@ ${output_css}
|
|||
|
||||
self._write(out,FILE_START.substitute(self.story.getAllMetadata()))
|
||||
|
||||
if self.getConfig('include_images') and self.story.cover:
|
||||
self._write(out,COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))
|
||||
|
||||
self.writeTitlePage(out,
|
||||
self.HTML_TITLE_PAGE_START,
|
||||
self.HTML_TITLE_ENTRY,
|
||||
|
|
@ -125,3 +137,8 @@ ${output_css}
|
|||
self._write(out,CHAPTER_END.substitute(vals))
|
||||
|
||||
self._write(out,FILE_END.substitute(self.story.getAllMetadata()))
|
||||
|
||||
if self.getConfig('include_images'):
|
||||
for imgmap in self.story.getImgUrls():
|
||||
self.writeFile(imgmap['newsrc'],imgmap['data'])
|
||||
|
||||
|
|
|
|||
|
|
@ -176,6 +176,7 @@ keep_summary_html:true
|
|||
strip_chapter_numbers:false
|
||||
|
||||
## add_chapter_numbers can be true, false or toconly
|
||||
## (Note number is not added when there's only one chapter.)
|
||||
add_chapter_numbers:false
|
||||
|
||||
## (Two versions of chapter_title_strip_pattern are shown below. You
|
||||
|
|
@ -206,6 +207,17 @@ chapter_title_add_pattern:${index}. ${title}
|
|||
## Each output format has a section that overrides [defaults]
|
||||
[html]
|
||||
|
||||
## include images from img tags in the body and summary of
|
||||
## stories. Images will be converted to jpg for size if possible.
|
||||
## include_images is *only* available in epub and html output formats.
|
||||
## include_images is *not* available in the web service in any format.
|
||||
#include_images:false
|
||||
|
||||
## This switch prevents FFDL from doing any processing on the images.
|
||||
## Usually they would be converted to jpg, resized and optionally made
|
||||
## grayscale.
|
||||
no_image_processing: true
|
||||
|
||||
## output background color--only used by html and epub (and ignored in
|
||||
## epub by many readers). Included below in output_css--will be
|
||||
## ignored if not in output_css.
|
||||
|
|
@ -305,7 +317,7 @@ output_css:
|
|||
|
||||
## include images from img tags in the body and summary of
|
||||
## stories. Images will be converted to jpg for size if possible.
|
||||
## include_images is *only* available in epub output format.
|
||||
## include_images is *only* available in epub and html output format.
|
||||
#include_images:false
|
||||
|
||||
## If set, the first image found will be made the cover image. If
|
||||
|
|
|
|||
Loading…
Reference in a new issue