diff --git a/app.yaml b/app.yaml index e233ad0f..85f91af6 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # fanfictionloader ffd-retief application: fanfictionloader -version: 4-0-2 +version: 4-0-3 runtime: python api_version: 1 diff --git a/downloader.py b/downloader.py index b9e4b73f..91d599a0 100644 --- a/downloader.py +++ b/downloader.py @@ -20,9 +20,12 @@ import logging logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") import sys, os +from StringIO import StringIO from optparse import OptionParser import getpass +from epubmerge import doMerge + if sys.version_info < (2, 5): print "This program requires Python 2.5 or newer." sys.exit(1) @@ -31,9 +34,9 @@ from fanficdownloader import adapters,writers,exceptions import ConfigParser -def writeStory(config,adapter,writeformat): +def writeStory(config,adapter,writeformat,metaonly=False,outstream=None): writer = writers.getWriter(writeformat,config,adapter) - writer.writeStory() + writer.writeStory(outstream=outstream,metaonly=metaonly) del writer def main(): @@ -41,25 +44,35 @@ def main(): # read in args, anything starting with -- will be treated as --= usage = "usage: %prog [options] storyurl" parser = OptionParser(usage) - parser.add_option("-f", "--format", dest="format", default='epub', + parser.add_option("-f", "--format", dest="format", default="epub", help="write story as FORMAT, epub(default), text or html", metavar="FORMAT") + parser.add_option("-b", "--begin", dest="begin", default=None, + help="Begin with Chapter START", metavar="START") + parser.add_option("-e", "--end", dest="end", default=None, + help="End with Chapter END", metavar="END") parser.add_option("-o", "--option", action="append", dest="options", help="set an option NAME=VALUE", metavar="NAME=VALUE") parser.add_option("-m", "--meta-only", action="store_true", dest="metaonly", - help="Retrieve metadata and stop",) + help="Retrieve metadata and stop. Write title_page only epub if epub.",) + parser.add_option("-u", "--update-epub", + action="store_true", dest="update", + help="Update an existing epub with new chapter, give epub filename instead of storyurl. Not compatible with inserted TOC.",) (options, args) = parser.parse_args() if len(args) != 1: parser.error("incorrect number of arguments") + if options.update and options.format != 'epub': + parser.error("-u/--update-epub only works with epub") + config = ConfigParser.SafeConfigParser() - logging.debug('reading defaults.ini config file, if present') + #logging.debug('reading defaults.ini config file, if present') config.read('defaults.ini') - logging.debug('reading personal.ini config file, if present') + #logging.debug('reading personal.ini config file, if present') config.read('personal.ini') try: @@ -72,7 +85,19 @@ def main(): config.set("overrides",var,val) try: - adapter = adapters.getAdapter(config,args[0]) + ## Attempt to update an existing epub. + if options.update: + updateio = StringIO() + (url,chaptercount) = doMerge(updateio, + args, + titlenavpoints=False, + striptitletoc=True, + forceunique=False) + print "Updating %s, URL: %s" % (args[0],url) + else: + url = args[0] + + adapter = adapters.getAdapter(config,url) try: adapter.getStoryMetadataOnly() @@ -89,19 +114,63 @@ def main(): adapter.is_adult=True adapter.getStoryMetadataOnly() - if options.metaonly: - print adapter.getStoryMetadataOnly() - return + if options.update: + urlchaptercount = int(adapter.getStoryMetadataOnly().getMetadata('numChapters')) + + if chaptercount == urlchaptercount: + print "%s already contains %d chapters." % (args[0],chaptercount) + elif chaptercount > urlchaptercount: + print "%s contains %d chapters, more than source: %d." % (args[0],chaptercount,urlchaptercount) + else: + print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount) + ## Get updated title page/metadata by itself in an epub. + ## Even if the title page isn't included, this carries the metadata. + titleio = StringIO() + writeStory(config,adapter,"epub",metaonly=True,outstream=titleio) + + ## Go get the new chapters only in another epub. + newchaptersio = StringIO() + adapter.setChaptersRange(chaptercount+1,urlchaptercount) + config.set("overrides",'include_tocpage','false') + config.set("overrides",'include_titlepage','false') + writeStory(config,adapter,"epub",outstream=newchaptersio) + + # out = open("testing/titleio.epub","wb") + # out.write(titleio.getvalue()) + # out.close() + + # out = open("testing/updateio.epub","wb") + # out.write(updateio.getvalue()) + # out.close() + + # out = open("testing/newchaptersio.epub","wb") + # out.write(newchaptersio.getvalue()) + # out.close() + + ## Merge the three epubs together. + doMerge(args[0], + [titleio,updateio,newchaptersio], + fromfirst=True, + titlenavpoints=False, + striptitletoc=False, + forceunique=False) - if options.format == "all": - ## For testing. Doing all three formats actually causes - ## some interesting config issues with format-specific - ## sections. But it should rarely be an issue. - writeStory(config,adapter,"epub") - writeStory(config,adapter,"html") - writeStory(config,adapter,"txt") else: - writeStory(config,adapter,options.format) + # regular download + if options.metaonly: + print adapter.getStoryMetadataOnly() + + adapter.setChaptersRange(options.begin,options.end) + + if options.format == "all": + ## For testing. Doing all three formats actually causes + ## some interesting config issues with format-specific + ## sections. But it should rarely be an issue. + writeStory(config,adapter,"epub",options.metaonly) + writeStory(config,adapter,"html",options.metaonly) + writeStory(config,adapter,"txt",options.metaonly) + else: + writeStory(config,adapter,options.format,options.metaonly) del adapter diff --git a/epubmerge.py b/epubmerge.py index 9e518060..f72e7add 100644 --- a/epubmerge.py +++ b/epubmerge.py @@ -18,73 +18,108 @@ # limitations under the License. import sys -import getopt import os +import re +#import StringIO +from optparse import OptionParser import zlib import zipfile from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED from time import time -from xml.dom.minidom import parse, parseString, getDOMImplementation - -def usage(): - print "epubmerge 1.0 Merges multiple epub format ebooks together" - print "\nUsage: " + sys.argv[0]+" [options] [ ...]\n" - print " Options:" - print " -h --help" - print " -o --output= Default: merge.epub" - print " -t --title= Default: ' Anthology'" - print " -a --author= Default: " - print " Multiple authors may be given." - -def main(): - try: - opts, args = getopt.getopt(sys.argv[1:], "t:a:o:h", ["title=","author=", "output=","help"]) - except getopt.GetoptError, err: - # print help information and exit: - print str(err) # will print something like "option -a not recognized" - usage() - sys.exit(2) +from exceptions import KeyError - if( len(args) < 1 ): - usage() - sys.exit() +from xml.dom.minidom import parse, parseString, getDOMImplementation - outputopt = "merge.epub" - titleopt = None - authoropts = [] # list of strings +def main(argv): + # read in args, anything starting with -- will be treated as --= + usage = "usage: %prog [options] [...]" + parser = OptionParser(usage) + parser.add_option("-o", "--output", dest="outputopt", default="merge.epub", + help="Set OUTPUT file, Default: merge.epub", metavar="OUTPUT") + parser.add_option("-t", "--title", dest="titleopt", default=None, + help="Use TITLE as the metadata title. Default: ' Anthology'", metavar="TITLE") + parser.add_option("-d", "--description", dest="descopt", default=None, + help="Use DESC as the metadata description. Default: ' by ' for each epub.", metavar="DESC") + parser.add_option("-a", "--author", + action="append", dest="authoropts", default=[], + help="Use AUTHOR as a metadata author, multiple authors may be given, Default: ", metavar="AUTHOR") + parser.add_option("-f", "--first", + action="store_true", dest="fromfirst", default=False, + help="Take all metadata from first input epub",) + parser.add_option("-n", "--titles-in-toc", + action="store_true", dest="titlenavpoints", + help="Put an entry in the TOC for each epub, in addition to each epub's chapters.",) + parser.add_option("-s", "--strip-title-toc", + action="store_true", dest="striptitletoc", + help="Strip any title_page.xhtml and toc_page.xhtml files.",) - for o, a in opts: - if o in ("-h", "--help"): - usage() - sys.exit() - elif o in ("-t", "--title"): - titleopt = a - elif o in ("-a", "--author"): - authoropts.append(a) - elif o in ("-o", "--output"): - outputopt = a - else: - assert False, "unhandled option" + (options, args) = parser.parse_args() ## Add .epub if not already there. - if( not outputopt.lower().endswith(".epub") ): - outputopt=outputopt+".epub" + if not options.outputopt.lower().endswith(".epub"): + options.outputopt=options.outputopt+".epub" - print "output file: "+outputopt - + print "output file: "+options.outputopt + doMerge(options.outputopt, + args, + options.authoropts, + options.titleopt, + options.descopt, + options.fromfirst, + options.titlenavpoints, + options.striptitletoc) + + # output = StringIO.StringIO() + # files = [] + # for file in args: + # f = open(file,"rb") + # fio = StringIO.StringIO(f.read()) + # f.close() + # files.append(fio) + + # doMerge(output,files,authoropts,titleopt,descopt,fromfirst,titlenavpoints,striptitletoc) + + # out = open(outputopt,"wb") + # out.write(output.getvalue()) + +def doMerge(outputio,files,authoropts=[],titleopt=None,descopt=None, + fromfirst=False, + titlenavpoints=True, + striptitletoc=False, + forceunique=True): + ''' + outputio = output file name or StringIO. + files = list of input file names or StringIOs. + authoropts = list of authors to use, otherwise add from all input + titleopt = title, otherwise ' Anthology' + descopt = description, otherwise ' by <author>' list for all input + fromfirst if true, take all metadata (including author, title, desc) from first input + titlenavpoints if true, put in a new TOC entry for each epub + striptitletoc if true, strip out any (title|toc)_page.xhtml files + forceunique if true, guarantee uniqueness of contents by adding a dir for each input + ''' + ## Python 2.5 ZipFile is rather more primative than later + ## versions. It can operate on a file, or on a StringIO, but + ## not on an open stream. OTOH, I suspect we would have had + ## problems with closing and opening again to change the + ## compression type anyway. + + filecount=0 + source=None + ## Write mimetype file, must be first and uncompressed. ## Older versions of python(2.4/5) don't allow you to specify ## compression by individual file. ## Overwrite if existing output file. - outputepub = ZipFile(outputopt, "w", compression=ZIP_STORED) + outputepub = ZipFile(outputio, "w", compression=ZIP_STORED) outputepub.debug = 3 outputepub.writestr("mimetype", "application/epub+zip") outputepub.close() ## Re-open file for content. - outputepub = ZipFile(outputopt, "a", compression=ZIP_DEFLATED) + outputepub = ZipFile(outputio, "a", compression=ZIP_DEFLATED) outputepub.debug = 3 ## Create META-INF/container.xml file. The only thing it does is @@ -109,13 +144,21 @@ def main(): booktitles = [] # list of strings -- Each book's title allauthors = [] # list of lists of strings -- Each book's list of authors. + + filelist = [] booknum=1 - for filename in args: - print "input file: "+filename + firstmetadom = None + for file in files: book = "%d" % booknum + bookdir = "" + bookid = "" + if forceunique: + bookdir = "%d/" % booknum + bookid = "a%d" % booknum + #print "book %d" % booknum - epub = ZipFile(filename, 'r') + epub = ZipFile(file, 'r') ## Find the .opf file. container = epub.read("META-INF/container.xml") @@ -129,6 +172,10 @@ def main(): relpath=relpath+"/" metadom = parseString(epub.read(rootfilename)) + if booknum==1: + firstmetadom = metadom.getElementsByTagName("metadata")[0] + source=firstmetadom.getElementsByTagName("dc:source")[0].firstChild.data.encode("utf-8") + #print "Source:%s"%source ## Save indiv book title booktitles.append(metadom.getElementsByTagName("dc:title")[0].firstChild.data) @@ -147,22 +194,33 @@ def main(): tocdom = parseString(epub.read(relpath+item.getAttribute("href"))) for navpoint in tocdom.getElementsByTagName("navPoint"): - navpoint.setAttribute("id","a"+book+navpoint.getAttribute("id")) + navpoint.setAttribute("id",bookid+navpoint.getAttribute("id")) for content in tocdom.getElementsByTagName("content"): - content.setAttribute("src",book+"/"+relpath+content.getAttribute("src")) + content.setAttribute("src",bookdir+relpath+content.getAttribute("src")) navmaps.append(tocdom.getElementsByTagName("navMap")[0]) else: - id="a"+book+item.getAttribute("id") - href=book+"/"+relpath+item.getAttribute("href") + id=bookid+item.getAttribute("id") + href=bookdir+relpath+item.getAttribute("href") href=href.encode('utf8') - items.append((id,href,item.getAttribute("media-type"))) - outputepub.writestr(href, - epub.read(relpath+item.getAttribute("href"))) + #print "href:"+href + if not striptitletoc or not re.match(r'.*/(title|toc)_page\.xhtml', + item.getAttribute("href")): + if href not in filelist: + try: + outputepub.writestr(href, + epub.read(relpath+item.getAttribute("href"))) + if re.match(r'.*/file\d+\.xhtml',href): + filecount+=1 + items.append((id,href,item.getAttribute("media-type"))) + filelist.append(href) + except KeyError, ke: + pass # Skip missing files. for itemref in metadom.getElementsByTagName("itemref"): - itemrefs.append("a"+book+itemref.getAttribute("idref")) + if not striptitletoc or not re.match(r'(title|toc)_page', itemref.getAttribute("idref")): + itemrefs.append(bookid+itemref.getAttribute("idref")) booknum=booknum+1; @@ -170,41 +228,52 @@ def main(): uniqueid="epubmerge-uid-%d" % time() # real sophisticated uid scheme. contentdom = getDOMImplementation().createDocument(None, "package", None) package = contentdom.documentElement - package.setAttribute("version","2.0") - package.setAttribute("xmlns","http://www.idpf.org/2007/opf") - package.setAttribute("unique-identifier","epubmerge-id") - metadata=newTag(contentdom,"metadata", - attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/", - "xmlns:opf":"http://www.idpf.org/2007/opf"}) - package.appendChild(metadata) - metadata.appendChild(newTag(contentdom,"dc:identifier",text=uniqueid,attrs={"id":"epubmerge-id"})) - if( titleopt is None ): - titleopt = booktitles[0]+" Anthology" - metadata.appendChild(newTag(contentdom,"dc:title",text=titleopt)) - - # If cmdline authors, use those instead of those collected from the epubs - # (allauthors kept for TOC & description gen below. - if( len(authoropts) > 1 ): - useauthors=[authoropts] + if fromfirst and firstmetadom: + metadata = firstmetadom + firstpackage = firstmetadom.parentNode + package.setAttribute("version",firstpackage.getAttribute("version")) + package.setAttribute("xmlns",firstpackage.getAttribute("xmlns")) + package.setAttribute("unique-identifier",firstpackage.getAttribute("unique-identifier")) else: - useauthors=allauthors - - usedauthors=dict() - for authorlist in useauthors: - for author in authorlist: - if( not usedauthors.has_key(author) ): - usedauthors[author]=author - metadata.appendChild(newTag(contentdom,"dc:creator", - attrs={"opf:role":"aut"}, - text=author)) - - metadata.appendChild(newTag(contentdom,"dc:contributor",text="epubmerge",attrs={"opf:role":"bkp"})) - metadata.appendChild(newTag(contentdom,"dc:rights",text="Copyrights as per source stories")) - metadata.appendChild(newTag(contentdom,"dc:language",text="en")) - - # created now, but not filled in until TOC generation to save loops. - description = newTag(contentdom,"dc:description",text="Anthology containing:\n") - metadata.appendChild(description) + package.setAttribute("version","2.0") + package.setAttribute("xmlns","http://www.idpf.org/2007/opf") + package.setAttribute("unique-identifier","epubmerge-id") + metadata=newTag(contentdom,"metadata", + attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/", + "xmlns:opf":"http://www.idpf.org/2007/opf"}) + metadata.appendChild(newTag(contentdom,"dc:identifier",text=uniqueid,attrs={"id":"epubmerge-id"})) + if( titleopt is None ): + titleopt = booktitles[0]+" Anthology" + metadata.appendChild(newTag(contentdom,"dc:title",text=titleopt)) + + # If cmdline authors, use those instead of those collected from the epubs + # (allauthors kept for TOC & description gen below. + if( len(authoropts) > 1 ): + useauthors=[authoropts] + else: + useauthors=allauthors + + usedauthors=dict() + for authorlist in useauthors: + for author in authorlist: + if( not usedauthors.has_key(author) ): + usedauthors[author]=author + metadata.appendChild(newTag(contentdom,"dc:creator", + attrs={"opf:role":"aut"}, + text=author)) + + metadata.appendChild(newTag(contentdom,"dc:contributor",text="epubmerge",attrs={"opf:role":"bkp"})) + metadata.appendChild(newTag(contentdom,"dc:rights",text="Copyrights as per source stories")) + metadata.appendChild(newTag(contentdom,"dc:language",text="en")) + + if not descopt: + # created now, but not filled in until TOC generation to save loops. + description = newTag(contentdom,"dc:description",text="Anthology containing:\n") + else: + description = newTag(contentdom,"dc:description",text=descopt) + metadata.appendChild(description) + + package.appendChild(metadata) manifest = contentdom.createElement("manifest") package.appendChild(manifest) @@ -245,24 +314,29 @@ def main(): tocnavMap = tocncxdom.createElement("navMap") ncx.appendChild(tocnavMap) - ## TOC navPoints can ge nested, but this flattens them for + ## TOC navPoints can be nested, but this flattens them for ## simplicity, plus adds a navPoint for each epub. booknum=0 for navmap in navmaps: navpoints = navmap.getElementsByTagName("navPoint") - ## Copy first navPoint of each epub, give a different id and - ## text: bookname by authorname - newnav = navpoints[0].cloneNode(True) - newnav.setAttribute("id","book"+newnav.getAttribute("id")) - ## For purposes of TOC titling & desc, use first book author - newtext = newTag(tocncxdom,"text",text=booktitles[booknum]+" by "+allauthors[booknum][0]) - description.appendChild(contentdom.createTextNode(booktitles[booknum]+" by "+allauthors[booknum][0]+"\n")) - text = newnav.getElementsByTagName("text")[0] - text.parentNode.replaceChild(newtext,text) - tocnavMap.appendChild(newnav) + if titlenavpoints: + ## Copy first navPoint of each epub, give a different id and + ## text: bookname by authorname + newnav = navpoints[0].cloneNode(True) + newnav.setAttribute("id","book"+newnav.getAttribute("id")) + ## For purposes of TOC titling & desc, use first book author + newtext = newTag(tocncxdom,"text",text=booktitles[booknum]+" by "+allauthors[booknum][0]) + text = newnav.getElementsByTagName("text")[0] + text.parentNode.replaceChild(newtext,text) + tocnavMap.appendChild(newnav) + if not descopt and not fromfirst: + description.appendChild(contentdom.createTextNode(booktitles[booknum]+" by "+allauthors[booknum][0]+"\n")) + for navpoint in navpoints: - tocnavMap.appendChild(navpoint) + #print "navpoint:%s"%navpoint.getAttribute("id") + if not striptitletoc or not re.match(r'(title|toc)_page',navpoint.getAttribute("id")): + tocnavMap.appendChild(navpoint) booknum=booknum+1; ## Force strict ordering of playOrder @@ -283,6 +357,8 @@ def main(): zf.create_system = 0 outputepub.close() + return (source,filecount) + ## Utility method for creating new tags. def newTag(dom,name,attrs=None,text=None): tag = dom.createElement(name) @@ -294,4 +370,4 @@ def newTag(dom,name,attrs=None,text=None): return tag if __name__ == "__main__": - main() + main(sys.argv[1:]) diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index bfb8dbe0..da3f5d12 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -75,7 +75,6 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" self.story.setMetadata('dateUpdated',datetime.datetime.now()) else: self.story.setMetadata('dateUpdated',makeDate("1975-01-31","%Y-%m-%d")) - self.story.setMetadata('numChapters','5') self.story.setMetadata('numWords','123456') self.story.setMetadata('status','In-Completed') self.story.setMetadata('rating','Tweenie') @@ -99,17 +98,39 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" ('Chapter 1, Xenos on Cinnabar',self.url+"&chapter=2"), ('Chapter 2, Sinmay on Kintikin',self.url+"&chapter=3"), ('Chapter 3, Over Cinnabar',self.url+"&chapter=4"), - ('Epilogue',self.url+"&chapter=5")] + ('Chapter 4',self.url+"&chapter=5"), + ('Chapter 5',self.url+"&chapter=6"), + ('Chapter 6',self.url+"&chapter=6"), + ('Chapter 7',self.url+"&chapter=6"), + ('Chapter 8',self.url+"&chapter=6"), + ('Chapter 9',self.url+"&chapter=6"), + ('Chapter 0',self.url+"&chapter=6"), + ('Chapter a',self.url+"&chapter=6"), + ('Chapter b',self.url+"&chapter=6"), + ('Chapter c',self.url+"&chapter=6"), + ('Chapter d',self.url+"&chapter=6"), + ('Chapter e',self.url+"&chapter=6"), + ('Chapter f',self.url+"&chapter=6"), + ('Chapter g',self.url+"&chapter=6"), + ('Chapter h',self.url+"&chapter=6"), + ('Chapter i',self.url+"&chapter=6"), + ('Chapter j',self.url+"&chapter=6"), + ('Chapter k',self.url+"&chapter=6"), + ('Chapter l',self.url+"&chapter=6"), + ('Chapter m',self.url+"&chapter=6"), + ('Chapter n',self.url+"&chapter=6"), + ] + self.story.setMetadata('numChapters',len(self.chapterUrls)) def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) if self.story.getMetadata('storyId') == '667': raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url) if self.story.getMetadata('storyId') == '670': time.sleep(2.0) - if "chapter=1" in url : text=u''' <div> diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 0fdf1178..406a3d7c 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -69,6 +69,8 @@ class BaseSiteAdapter(Configurable): self.story.setMetadata('site',self.getSiteDomain()) self.story.setMetadata('dateCreated',datetime.datetime.now()) self.chapterUrls = [] # tuples of (chapter title,chapter url) + self.chapterFirst = None + self.chapterLast = None ## order of preference for decoding. self.decode = ["utf8", "Windows-1252"] # 1252 is a superset of @@ -135,14 +137,26 @@ class BaseSiteAdapter(Configurable): logging.error("Giving up on %s" %url) logging.exception(excpt) raise(excpt) - + + # Limit chapters to download. Input starts at 1, list starts at 0 + def setChaptersRange(self,first=None,last=None): + if first: + self.chapterFirst=int(first)-1 + if last: + self.chapterLast=int(last)-1 + # Does the download the first time it's called. def getStory(self): if not self.storyDone: self.getStoryMetadataOnly() - for (title,url) in self.chapterUrls: - self.story.addChapter(removeEntities(title), - removeEntities(self.getChapterText(url))) + for index, (title,url) in enumerate(self.chapterUrls): + if (self.chapterFirst!=None and index < self.chapterFirst) or \ + (self.chapterLast!=None and index > self.chapterLast): + self.story.addChapter(removeEntities(title), + None) + else: + self.story.addChapter(removeEntities(title), + removeEntities(self.getChapterText(url))) self.storyDone = True return self.story diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 085970d6..32c742a3 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -172,19 +172,21 @@ class BaseStoryWriter(Configurable): names as Story.metadata, but ENTRY should use index and chapter. """ # Only do TOC if there's more than one chapter and it's configured. - if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage"): + if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : self._write(out,START.substitute(self.story.metadata)) for index, (title,html) in enumerate(self.story.getChapters()): - self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)})) + if html: + self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)})) self._write(out,END.substitute(self.story.metadata)) # if no outstream is given, write to file. - def writeStory(self,outstream=None): + def writeStory(self,outstream=None,metaonly=False): for tag in self.getConfigList("extratags"): self.story.addToList("extratags",tag) + self.metaonly = metaonly outfilename=self.getOutputFileName() if not outstream: @@ -207,22 +209,24 @@ class BaseStoryWriter(Configurable): if fileupdated > lastupdated: print "File(%s) Updated(%s) more recently than Story(%s) - Skipping" % (outfilename,fileupdated,lastupdated) return - - self.story = self.adapter.getStory() # get full story now, - # just before - # writing. Fetch - # before opening - # file. + if not metaonly: + self.story = self.adapter.getStory() # get full story + # now, just + # before writing. + # Fetch before + # opening file. outstream = open(outfilename,"wb") else: close=False logging.debug("Save to stream") - self.story = self.adapter.getStory() # get full story now, - # just before writing. - # Okay if double called - # with above, it will - # only fetch once. + if not metaonly: + self.story = self.adapter.getStory() # get full story now, + # just before + # writing. Okay if + # double called with + # above, it will only + # fetch once. if self.getConfig('zip_output'): out = StringIO.StringIO() self.writeStoryImpl(out) diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index 3d7ce7ab..c8502684 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -292,16 +292,17 @@ h6 { text-align: center; } if self.getConfig("include_titlepage"): items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page")) itemrefs.append("title_page") - if self.getConfig("include_tocpage"): + if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents")) itemrefs.append("toc_page") for index, (title,html) in enumerate(self.story.getChapters()): - i=index+1 - items.append(("file%04d"%i, - "OEBPS/file%04d.xhtml"%i, - "application/xhtml+xml", - title)) - itemrefs.append("file%04d"%i) + if html: + i=index+1 + items.append(("file%04d"%i, + "OEBPS/file%04d.xhtml"%i, + "application/xhtml+xml", + title)) + itemrefs.append("file%04d"%i) manifest = contentdom.createElement("manifest") package.appendChild(manifest) @@ -407,14 +408,16 @@ h6 { text-align: center; } tocpageIO.close() for index, (title,html) in enumerate(self.story.getChapters()): - logging.debug('Writing chapter text for: %s' % title) - fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) - # ffnet(& maybe others) gives the whole chapter text as - # one line. This causes problems for nook(at least) when - # the chapter size starts getting big (200k+) - fullhtml = fullhtml.replace('</p>','</p>\n').replace('<br />','<br />\n') - outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8')) - del fullhtml + if html: + logging.debug('Writing chapter text for: %s' % title) + fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) + # ffnet(& maybe others) gives the whole chapter text + # as one line. This causes problems for nook(at + # least) when the chapter size starts getting big + # (200k+) + fullhtml = fullhtml.replace('</p>','</p>\n').replace('<br />','<br />\n') + outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8')) + del fullhtml # declares all the files created by Windows. otherwise, when # it runs in appengine, windows unzips the files as 000 perms. diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py index 758385fd..32d27d62 100644 --- a/fanficdownloader/writers/writer_html.py +++ b/fanficdownloader/writers/writer_html.py @@ -107,8 +107,9 @@ class HTMLWriter(BaseStoryWriter): self.HTML_TOC_PAGE_END) for index, (title,html) in enumerate(self.story.getChapters()): - logging.debug('Writing chapter text for: %s' % title) - self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)})) - self._write(out,html) + if html: + logging.debug('Writing chapter text for: %s' % title) + self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)})) + self._write(out,html) self._write(out,self.HTML_FILE_END.substitute(self.story.metadata)) diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py index 4dd98c6b..73b48afb 100644 --- a/fanficdownloader/writers/writer_mobi.py +++ b/fanficdownloader/writers/writer_mobi.py @@ -166,14 +166,16 @@ class MobiWriter(BaseStoryWriter): # tocpageIO.close() for index, (title,html) in enumerate(self.story.getChapters()): - logging.debug('Writing chapter text for: %s' % title) - fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) - # ffnet(& maybe others) gives the whole chapter text as - # one line. This causes problems for nook(at least) when - # the chapter size starts getting big (200k+) - fullhtml = fullhtml.replace('</p>','</p>\n').replace('<br />','<br />\n') - files.append(fullhtml.encode('utf-8')) - del fullhtml + if html: + logging.debug('Writing chapter text for: %s' % title) + fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) + # ffnet(& maybe others) gives the whole chapter text + # as one line. This causes problems for nook(at + # least) when the chapter size starts getting big + # (200k+) + fullhtml = fullhtml.replace('</p>','</p>\n').replace('<br />','<br />\n') + files.append(fullhtml.encode('utf-8')) + del fullhtml c = Converter(title=self.getMetadata('title'), author=self.getMetadata('author'), diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py index 6cbeba31..ccf46375 100644 --- a/fanficdownloader/writers/writer_txt.py +++ b/fanficdownloader/writers/writer_txt.py @@ -128,10 +128,10 @@ End file. self._write(out,self.lineends(self.wraplines(towrap))) for index, (title,html) in enumerate(self.story.getChapters()): - logging.debug('Writing chapter text for: %s' % title) - self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1}))))) - - self._write(out,self.lineends(html2text(html))) + if html: + logging.debug('Writing chapter text for: %s' % title) + self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1}))))) + self._write(out,self.lineends(html2text(html))) self._write(out,self.lineends(self.wraplines(self.TEXT_FILE_END.substitute(self.story.metadata)))) diff --git a/recent.html b/recent.html index 6baad0f7..6a7ef3dd 100644 --- a/recent.html +++ b/recent.html @@ -50,11 +50,11 @@ {% for fic in fics %} <p> {% if fic.completed %} - <span class="recent"><a href="/file?id={{ fic.key }}">Download {{ fic.title }}</a></span> + <span class="recent"><a href="/file?id={{ fic.key }}">Download <i>{{ fic.title }}</i></a></span> by {{ fic.author }} ({{ fic.format }}) {% endif %} {% if not fic.completed and not fic.failure %} - <span class="recent">Processing {{ fic.title }}</span> + <span class="recent">Processing <i>{{ fic.title }}</i></span> by {{ fic.author }} ({{ fic.format }}) {% endif %} {% if fic.failure %} diff --git a/status.html b/status.html index c65342de..5fd2b9de 100644 --- a/status.html +++ b/status.html @@ -47,7 +47,7 @@ <p> {% if fic.completed %} <p>Your fic has finished processing and you can download it now.</p> - <span class="recent"><a href="/file?id={{ fic.key }}">Download {{ fic.title }}</a></span> + <span class="recent"><a href="/file?id={{ fic.key }}">Download <i>{{ fic.title }}</i></a></span> by {{ fic.author }} ({{ fic.format }}) {% endif %} {% if fic.failure %} @@ -55,7 +55,7 @@ {% endif %} {% if not fic.completed and not fic.failure %} <p>Not done yet. This page will periodically poll to see if your story has finished.</p> - <span class="recent">Processing {{ fic.title }}</span> + <span class="recent">Processing <i>{{ fic.title }}</i></span> by {{ fic.author }} ({{ fic.format }}) {% endif %} <a href="{{ fic.url }}" title="Link to original story">Source</a>