Make default_cover_image use ${title}, etc; add cover_exclusion_regexp feature.

This commit is contained in:
Jim Miller 2012-03-19 16:22:51 -05:00
parent 3a8d045e7a
commit 7c43ecb56d
5 changed files with 69 additions and 28 deletions

View file

@ -112,9 +112,9 @@ zip_output: false
## Can include directories. .zip will be added if not in name somewhere
zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip
## Normally, try to make the output file name 'safe' by removing
## invalid filename chars. Applies to both output_filename &
## zip_filename.
## Normally, try to make the filenames 'safe' by removing invalid
## filename chars. Applies to default_cover_image, output_filename &
## zip_filename.
allow_unsafe_filename: false
## entries to make epub subjects and calibre tags
@ -259,9 +259,18 @@ output_css:
## Note that if you enable make_firstimage_cover in [epub], but want
## to use default_cover_image for a specific site, use the site:format
## section, for example: [www.ficwad.com:epub]
## default_cover_image is a python string Template string with
## ${title}, ${author} etc, same as titlepage_entries. Unless
## allow_unsafe_filename is true, invalid filename chars will be
## removed from metadata fields
#default_cover_image:file:///C:/Users/username/Desktop/nook/images/icon.png
#default_cover_image:file:///C:/Users/username/Desktop/nook/images/${title}/icon.png
#default_cover_image:http://www.somesite.com/someimage.gif
## some sites include images that we don't ever want becoming the
## cover image. This lets you exclude them.
#cover_exclusion_regexp:/stories/999/images/.*?_trophy.png
## Resize images down to width, height, preserving aspect ratio.
## Nook size, with margin.
image_max_size: 580, 725
@ -311,9 +320,16 @@ extratags: FanFiction,Testing,HTML
#is_adult:true
[fanfiction.mugglenet.com]
## Some sites do not require a login, but do require the user to
## confirm they are adult for adult content. In commandline version,
## this should go in your personal.ini, not defaults.ini.
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#username:YourName
#password:yourpassword
## Some sites also require the user to confirm they are adult for
## adult content. In commandline version, this should go in your
## personal.ini, not defaults.ini.
#is_adult:true
[fanfiction.portkey.org]
@ -422,6 +438,10 @@ extratags:
## this should go in your personal.ini, not defaults.ini.
#is_adult:true
## some sites include images that we don't ever want becoming the
## cover image. This lets you exclude them.
cover_exclusion_regexp:/stories/999/images/.*?_trophy.png
[www.potionsandsnitches.net]
[www.siye.co.uk]

View file

@ -215,7 +215,9 @@ class BaseSiteAdapter(Configurable):
self.getConfig('default_cover_image'):
self.story.addImgUrl(self,
None,
self.getConfig('default_cover_image'),
#self.getConfig('default_cover_image'),
self.story.formatFileName(self.getConfig('default_cover_image'),
self.getConfig('allow_unsafe_filename')),
self._fetchUrlRaw,
cover=True)
return self.story
@ -287,7 +289,8 @@ class BaseSiteAdapter(Configurable):
# some pre-existing epubs have img tags that had src stripped off.
if img.has_key('src'):
img['longdesc']=img['src']
img['src']=self.story.addImgUrl(self,url,img['src'],fetch)
img['src']=self.story.addImgUrl(self,url,img['src'],fetch,
coverexclusion=self.getConfig('cover_exclusion_regexp'))
for attr in soup._getAttrMap().keys():
if attr not in acceptable_attributes:

View file

@ -17,6 +17,7 @@
import os, re
import urlparse
import string
from math import floor
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
@ -278,9 +279,23 @@ class Story:
"Chapters will be tuples of (title,html)"
return self.chapters
def formatFileName(self,template,allowunsafefilename=True):
values = origvalues = self.getAllMetadata()
# fall back default:
if not template:
template="${title}-${siteabbrev}_${storyId}${formatext}"
if not allowunsafefilename:
values={}
pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+")
for k in origvalues.keys():
values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k)))
return string.Template(template).substitute(values).encode('utf8')
# pass fetch in from adapter in case we need the cookies collected
# as well as it's a base_story class method.
def addImgUrl(self,configurable,parenturl,url,fetch,cover=False):
def addImgUrl(self,configurable,parenturl,url,fetch,cover=False,coverexclusion=None):
url = url.strip() # ran across an image with a space in the
# src. Browser handled it, so we'd better, too.
@ -339,9 +354,14 @@ class Story:
else:
self.imgurls.append(imgurl)
# First image, copy not link because calibre will replace with it's cover.
if len(self.imgurls)==1 and \
# Only if: No cover already AND
# make_firstimage_cover AND
# NOT never_make_cover AND
# either no coverexclusion OR coverexclusion doesn't match
if self.cover == None and \
configurable.getConfig('make_firstimage_cover') and \
not configurable.getConfig('never_make_cover'):
not configurable.getConfig('never_make_cover') and \
(not coverexclusion or not re.search(coverexclusion,imgurl)):
newsrc = "images/cover.%s"%ext
self.cover=newsrc
self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})

View file

@ -18,7 +18,6 @@
import re
import os.path
import datetime
import string
import StringIO
import zipfile
from zipfile import ZipFile, ZIP_DEFLATED
@ -123,24 +122,10 @@ class BaseStoryWriter(Configurable):
return self.getBaseFileName()
def getBaseFileName(self):
return self.formatFileName(self.getConfig('output_filename'))
return self.story.formatFileName(self.getConfig('output_filename'),self.getConfig('allow_unsafe_filename'))
def getZipFileName(self):
return self.formatFileName(self.getConfig('zip_filename'))
def formatFileName(self,template):
values = origvalues = self.story.getAllMetadata()
# fall back default:
if not template:
template="${title}-${siteabbrev}_${storyId}${formatext}"
if not self.getConfig('allow_unsafe_filename'):
values={}
pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+")
for k in origvalues.keys():
values[k]=re.sub(pattern,'_', removeAllEntities(self.story.getMetadata(k)))
return string.Template(template).substitute(values).encode('utf8')
return self.story.formatFileName(self.getConfig('zip_filename'),self.getConfig('allow_unsafe_filename'))
def _write(self, out, text):
out.write(text.encode('utf8'))

View file

@ -231,9 +231,18 @@ output_css:
## Note that if you enable make_firstimage_cover in [epub], but want
## to use default_cover_image for a specific site, use the site:format
## section, for example: [www.ficwad.com:epub]
## default_cover_image is a python string Template string with
## ${title}, ${author} etc, same as titlepage_entries. Unless
## allow_unsafe_filename is true, invalid filename chars will be
## removed from metadata fields
#default_cover_image:file:///C:/Users/username/Desktop/nook/images/icon.png
#default_cover_image:file:///C:/Users/username/Desktop/nook/images/${title}/icon.png
#default_cover_image:http://www.somesite.com/someimage.gif
## some sites include images that we don't ever want becoming the
## cover image. This lets you exclude them.
#cover_exclusion_regexp:/stories/999/images/.*?_trophy.png
## Resize images down to width, height, preserving aspect ratio.
## Nook size, with margin.
image_max_size: 580, 725
@ -391,6 +400,10 @@ extratags:
## this should go in your personal.ini, not defaults.ini.
#is_adult:true
## some sites include images that we don't ever want becoming the
## cover image. This lets you exclude them.
cover_exclusion_regexp:/stories/999/images/.*?_trophy.png
[www.potionsandsnitches.net]
[www.siye.co.uk]