diff --git a/defaults.ini b/defaults.ini index cf95d156..858f6875 100644 --- a/defaults.ini +++ b/defaults.ini @@ -112,9 +112,9 @@ zip_output: false ## Can include directories. .zip will be added if not in name somewhere zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip -## Normally, try to make the output file name 'safe' by removing -## invalid filename chars. Applies to both output_filename & -## zip_filename. +## Normally, try to make the filenames 'safe' by removing invalid +## filename chars. Applies to default_cover_image, output_filename & +## zip_filename. allow_unsafe_filename: false ## entries to make epub subjects and calibre tags @@ -259,9 +259,18 @@ output_css: ## Note that if you enable make_firstimage_cover in [epub], but want ## to use default_cover_image for a specific site, use the site:format ## section, for example: [www.ficwad.com:epub] +## default_cover_image is a python string Template string with +## ${title}, ${author} etc, same as titlepage_entries. Unless +## allow_unsafe_filename is true, invalid filename chars will be +## removed from metadata fields #default_cover_image:file:///C:/Users/username/Desktop/nook/images/icon.png +#default_cover_image:file:///C:/Users/username/Desktop/nook/images/${title}/icon.png #default_cover_image:http://www.somesite.com/someimage.gif +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +#cover_exclusion_regexp:/stories/999/images/.*?_trophy.png + ## Resize images down to width, height, preserving aspect ratio. ## Nook size, with margin. image_max_size: 580, 725 @@ -311,9 +320,16 @@ extratags: FanFiction,Testing,HTML #is_adult:true [fanfiction.mugglenet.com] -## Some sites do not require a login, but do require the user to -## confirm they are adult for adult content. In commandline version, -## this should go in your personal.ini, not defaults.ini. +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. #is_adult:true [fanfiction.portkey.org] @@ -422,6 +438,10 @@ extratags: ## this should go in your personal.ini, not defaults.ini. #is_adult:true +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/stories/999/images/.*?_trophy.png + [www.potionsandsnitches.net] [www.siye.co.uk] diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index ddf3543b..35d6d035 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -215,7 +215,9 @@ class BaseSiteAdapter(Configurable): self.getConfig('default_cover_image'): self.story.addImgUrl(self, None, - self.getConfig('default_cover_image'), + #self.getConfig('default_cover_image'), + self.story.formatFileName(self.getConfig('default_cover_image'), + self.getConfig('allow_unsafe_filename')), self._fetchUrlRaw, cover=True) return self.story @@ -287,7 +289,8 @@ class BaseSiteAdapter(Configurable): # some pre-existing epubs have img tags that had src stripped off. if img.has_key('src'): img['longdesc']=img['src'] - img['src']=self.story.addImgUrl(self,url,img['src'],fetch) + img['src']=self.story.addImgUrl(self,url,img['src'],fetch, + coverexclusion=self.getConfig('cover_exclusion_regexp')) for attr in soup._getAttrMap().keys(): if attr not in acceptable_attributes: diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index 298b205c..555cde37 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -17,6 +17,7 @@ import os, re import urlparse +import string from math import floor from htmlcleanup import conditionalRemoveEntities, removeAllEntities @@ -278,9 +279,23 @@ class Story: "Chapters will be tuples of (title,html)" return self.chapters + def formatFileName(self,template,allowunsafefilename=True): + values = origvalues = self.getAllMetadata() + # fall back default: + if not template: + template="${title}-${siteabbrev}_${storyId}${formatext}" + + if not allowunsafefilename: + values={} + pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+") + for k in origvalues.keys(): + values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k))) + + return string.Template(template).substitute(values).encode('utf8') + # pass fetch in from adapter in case we need the cookies collected # as well as it's a base_story class method. - def addImgUrl(self,configurable,parenturl,url,fetch,cover=False): + def addImgUrl(self,configurable,parenturl,url,fetch,cover=False,coverexclusion=None): url = url.strip() # ran across an image with a space in the # src. Browser handled it, so we'd better, too. @@ -339,9 +354,14 @@ class Story: else: self.imgurls.append(imgurl) # First image, copy not link because calibre will replace with it's cover. - if len(self.imgurls)==1 and \ + # Only if: No cover already AND + # make_firstimage_cover AND + # NOT never_make_cover AND + # either no coverexclusion OR coverexclusion doesn't match + if self.cover == None and \ configurable.getConfig('make_firstimage_cover') and \ - not configurable.getConfig('never_make_cover'): + not configurable.getConfig('never_make_cover') and \ + (not coverexclusion or not re.search(coverexclusion,imgurl)): newsrc = "images/cover.%s"%ext self.cover=newsrc self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data}) diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 2d2a3600..f09de620 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -18,7 +18,6 @@ import re import os.path import datetime -import string import StringIO import zipfile from zipfile import ZipFile, ZIP_DEFLATED @@ -123,24 +122,10 @@ class BaseStoryWriter(Configurable): return self.getBaseFileName() def getBaseFileName(self): - return self.formatFileName(self.getConfig('output_filename')) + return self.story.formatFileName(self.getConfig('output_filename'),self.getConfig('allow_unsafe_filename')) def getZipFileName(self): - return self.formatFileName(self.getConfig('zip_filename')) - - def formatFileName(self,template): - values = origvalues = self.story.getAllMetadata() - # fall back default: - if not template: - template="${title}-${siteabbrev}_${storyId}${formatext}" - - if not self.getConfig('allow_unsafe_filename'): - values={} - pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+") - for k in origvalues.keys(): - values[k]=re.sub(pattern,'_', removeAllEntities(self.story.getMetadata(k))) - - return string.Template(template).substitute(values).encode('utf8') + return self.story.formatFileName(self.getConfig('zip_filename'),self.getConfig('allow_unsafe_filename')) def _write(self, out, text): out.write(text.encode('utf8')) diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 9c12edb5..60cd9a41 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -231,9 +231,18 @@ output_css: ## Note that if you enable make_firstimage_cover in [epub], but want ## to use default_cover_image for a specific site, use the site:format ## section, for example: [www.ficwad.com:epub] +## default_cover_image is a python string Template string with +## ${title}, ${author} etc, same as titlepage_entries. Unless +## allow_unsafe_filename is true, invalid filename chars will be +## removed from metadata fields #default_cover_image:file:///C:/Users/username/Desktop/nook/images/icon.png +#default_cover_image:file:///C:/Users/username/Desktop/nook/images/${title}/icon.png #default_cover_image:http://www.somesite.com/someimage.gif +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +#cover_exclusion_regexp:/stories/999/images/.*?_trophy.png + ## Resize images down to width, height, preserving aspect ratio. ## Nook size, with margin. image_max_size: 580, 725 @@ -391,6 +400,10 @@ extratags: ## this should go in your personal.ini, not defaults.ini. #is_adult:true +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/stories/999/images/.*?_trophy.png + [www.potionsandsnitches.net] [www.siye.co.uk]