mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
CLI downloader can update existing epubs, pull only metadata or chapter range.
epubmerge provides a lot of the grunt work for updates.
This commit is contained in:
parent
e202105248
commit
8bf22729fe
12 changed files with 367 additions and 177 deletions
2
app.yaml
2
app.yaml
|
|
@ -1,6 +1,6 @@
|
|||
# fanfictionloader ffd-retief
|
||||
application: fanfictionloader
|
||||
version: 4-0-2
|
||||
version: 4-0-3
|
||||
runtime: python
|
||||
api_version: 1
|
||||
|
||||
|
|
|
|||
|
|
@ -20,9 +20,12 @@ import logging
|
|||
logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
|
||||
|
||||
import sys, os
|
||||
from StringIO import StringIO
|
||||
from optparse import OptionParser
|
||||
import getpass
|
||||
|
||||
from epubmerge import doMerge
|
||||
|
||||
if sys.version_info < (2, 5):
|
||||
print "This program requires Python 2.5 or newer."
|
||||
sys.exit(1)
|
||||
|
|
@ -31,9 +34,9 @@ from fanficdownloader import adapters,writers,exceptions
|
|||
|
||||
import ConfigParser
|
||||
|
||||
def writeStory(config,adapter,writeformat):
|
||||
def writeStory(config,adapter,writeformat,metaonly=False,outstream=None):
|
||||
writer = writers.getWriter(writeformat,config,adapter)
|
||||
writer.writeStory()
|
||||
writer.writeStory(outstream=outstream,metaonly=metaonly)
|
||||
del writer
|
||||
|
||||
def main():
|
||||
|
|
@ -41,25 +44,35 @@ def main():
|
|||
# read in args, anything starting with -- will be treated as --<varible>=<value>
|
||||
usage = "usage: %prog [options] storyurl"
|
||||
parser = OptionParser(usage)
|
||||
parser.add_option("-f", "--format", dest="format", default='epub',
|
||||
parser.add_option("-f", "--format", dest="format", default="epub",
|
||||
help="write story as FORMAT, epub(default), text or html", metavar="FORMAT")
|
||||
parser.add_option("-b", "--begin", dest="begin", default=None,
|
||||
help="Begin with Chapter START", metavar="START")
|
||||
parser.add_option("-e", "--end", dest="end", default=None,
|
||||
help="End with Chapter END", metavar="END")
|
||||
parser.add_option("-o", "--option",
|
||||
action="append", dest="options",
|
||||
help="set an option NAME=VALUE", metavar="NAME=VALUE")
|
||||
parser.add_option("-m", "--meta-only",
|
||||
action="store_true", dest="metaonly",
|
||||
help="Retrieve metadata and stop",)
|
||||
help="Retrieve metadata and stop. Write title_page only epub if epub.",)
|
||||
parser.add_option("-u", "--update-epub",
|
||||
action="store_true", dest="update",
|
||||
help="Update an existing epub with new chapter, give epub filename instead of storyurl. Not compatible with inserted TOC.",)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if len(args) != 1:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
||||
if options.update and options.format != 'epub':
|
||||
parser.error("-u/--update-epub only works with epub")
|
||||
|
||||
config = ConfigParser.SafeConfigParser()
|
||||
|
||||
logging.debug('reading defaults.ini config file, if present')
|
||||
#logging.debug('reading defaults.ini config file, if present')
|
||||
config.read('defaults.ini')
|
||||
logging.debug('reading personal.ini config file, if present')
|
||||
#logging.debug('reading personal.ini config file, if present')
|
||||
config.read('personal.ini')
|
||||
|
||||
try:
|
||||
|
|
@ -72,7 +85,19 @@ def main():
|
|||
config.set("overrides",var,val)
|
||||
|
||||
try:
|
||||
adapter = adapters.getAdapter(config,args[0])
|
||||
## Attempt to update an existing epub.
|
||||
if options.update:
|
||||
updateio = StringIO()
|
||||
(url,chaptercount) = doMerge(updateio,
|
||||
args,
|
||||
titlenavpoints=False,
|
||||
striptitletoc=True,
|
||||
forceunique=False)
|
||||
print "Updating %s, URL: %s" % (args[0],url)
|
||||
else:
|
||||
url = args[0]
|
||||
|
||||
adapter = adapters.getAdapter(config,url)
|
||||
|
||||
try:
|
||||
adapter.getStoryMetadataOnly()
|
||||
|
|
@ -89,19 +114,63 @@ def main():
|
|||
adapter.is_adult=True
|
||||
adapter.getStoryMetadataOnly()
|
||||
|
||||
if options.update:
|
||||
urlchaptercount = int(adapter.getStoryMetadataOnly().getMetadata('numChapters'))
|
||||
|
||||
if chaptercount == urlchaptercount:
|
||||
print "%s already contains %d chapters." % (args[0],chaptercount)
|
||||
elif chaptercount > urlchaptercount:
|
||||
print "%s contains %d chapters, more than source: %d." % (args[0],chaptercount,urlchaptercount)
|
||||
else:
|
||||
print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)
|
||||
## Get updated title page/metadata by itself in an epub.
|
||||
## Even if the title page isn't included, this carries the metadata.
|
||||
titleio = StringIO()
|
||||
writeStory(config,adapter,"epub",metaonly=True,outstream=titleio)
|
||||
|
||||
## Go get the new chapters only in another epub.
|
||||
newchaptersio = StringIO()
|
||||
adapter.setChaptersRange(chaptercount+1,urlchaptercount)
|
||||
config.set("overrides",'include_tocpage','false')
|
||||
config.set("overrides",'include_titlepage','false')
|
||||
writeStory(config,adapter,"epub",outstream=newchaptersio)
|
||||
|
||||
# out = open("testing/titleio.epub","wb")
|
||||
# out.write(titleio.getvalue())
|
||||
# out.close()
|
||||
|
||||
# out = open("testing/updateio.epub","wb")
|
||||
# out.write(updateio.getvalue())
|
||||
# out.close()
|
||||
|
||||
# out = open("testing/newchaptersio.epub","wb")
|
||||
# out.write(newchaptersio.getvalue())
|
||||
# out.close()
|
||||
|
||||
## Merge the three epubs together.
|
||||
doMerge(args[0],
|
||||
[titleio,updateio,newchaptersio],
|
||||
fromfirst=True,
|
||||
titlenavpoints=False,
|
||||
striptitletoc=False,
|
||||
forceunique=False)
|
||||
|
||||
else:
|
||||
# regular download
|
||||
if options.metaonly:
|
||||
print adapter.getStoryMetadataOnly()
|
||||
return
|
||||
|
||||
adapter.setChaptersRange(options.begin,options.end)
|
||||
|
||||
if options.format == "all":
|
||||
## For testing. Doing all three formats actually causes
|
||||
## some interesting config issues with format-specific
|
||||
## sections. But it should rarely be an issue.
|
||||
writeStory(config,adapter,"epub")
|
||||
writeStory(config,adapter,"html")
|
||||
writeStory(config,adapter,"txt")
|
||||
writeStory(config,adapter,"epub",options.metaonly)
|
||||
writeStory(config,adapter,"html",options.metaonly)
|
||||
writeStory(config,adapter,"txt",options.metaonly)
|
||||
else:
|
||||
writeStory(config,adapter,options.format)
|
||||
writeStory(config,adapter,options.format,options.metaonly)
|
||||
|
||||
del adapter
|
||||
|
||||
|
|
|
|||
194
epubmerge.py
194
epubmerge.py
|
|
@ -18,73 +18,108 @@
|
|||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import getopt
|
||||
import os
|
||||
import re
|
||||
#import StringIO
|
||||
from optparse import OptionParser
|
||||
|
||||
import zlib
|
||||
import zipfile
|
||||
from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
|
||||
from time import time
|
||||
|
||||
from exceptions import KeyError
|
||||
|
||||
from xml.dom.minidom import parse, parseString, getDOMImplementation
|
||||
|
||||
def usage():
|
||||
print "epubmerge 1.0 Merges multiple epub format ebooks together"
|
||||
print "\nUsage: " + sys.argv[0]+" [options] <input epub> [<input epub> ...]\n"
|
||||
print " Options:"
|
||||
print " -h --help"
|
||||
print " -o <output file> --output=<output file> Default: merge.epub"
|
||||
print " -t <output title> --title=<output title> Default: '<First Title> Anthology'"
|
||||
print " -a <author name> --author=<author name> Default: <All authors from epubs>"
|
||||
print " Multiple authors may be given."
|
||||
def main(argv):
|
||||
# read in args, anything starting with -- will be treated as --<varible>=<value>
|
||||
usage = "usage: %prog [options] <input epub> [<input epub>...]"
|
||||
parser = OptionParser(usage)
|
||||
parser.add_option("-o", "--output", dest="outputopt", default="merge.epub",
|
||||
help="Set OUTPUT file, Default: merge.epub", metavar="OUTPUT")
|
||||
parser.add_option("-t", "--title", dest="titleopt", default=None,
|
||||
help="Use TITLE as the metadata title. Default: '<first epub title> Anthology'", metavar="TITLE")
|
||||
parser.add_option("-d", "--description", dest="descopt", default=None,
|
||||
help="Use DESC as the metadata description. Default: '<epub title> by <author>' for each epub.", metavar="DESC")
|
||||
parser.add_option("-a", "--author",
|
||||
action="append", dest="authoropts", default=[],
|
||||
help="Use AUTHOR as a metadata author, multiple authors may be given, Default: <All authors from epubs>", metavar="AUTHOR")
|
||||
parser.add_option("-f", "--first",
|
||||
action="store_true", dest="fromfirst", default=False,
|
||||
help="Take all metadata from first input epub",)
|
||||
parser.add_option("-n", "--titles-in-toc",
|
||||
action="store_true", dest="titlenavpoints",
|
||||
help="Put an entry in the TOC for each epub, in addition to each epub's chapters.",)
|
||||
parser.add_option("-s", "--strip-title-toc",
|
||||
action="store_true", dest="striptitletoc",
|
||||
help="Strip any title_page.xhtml and toc_page.xhtml files.",)
|
||||
|
||||
def main():
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "t:a:o:h", ["title=","author=", "output=","help"])
|
||||
except getopt.GetoptError, err:
|
||||
# print help information and exit:
|
||||
print str(err) # will print something like "option -a not recognized"
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
if( len(args) < 1 ):
|
||||
usage()
|
||||
sys.exit()
|
||||
|
||||
outputopt = "merge.epub"
|
||||
titleopt = None
|
||||
authoropts = [] # list of strings
|
||||
|
||||
for o, a in opts:
|
||||
if o in ("-h", "--help"):
|
||||
usage()
|
||||
sys.exit()
|
||||
elif o in ("-t", "--title"):
|
||||
titleopt = a
|
||||
elif o in ("-a", "--author"):
|
||||
authoropts.append(a)
|
||||
elif o in ("-o", "--output"):
|
||||
outputopt = a
|
||||
else:
|
||||
assert False, "unhandled option"
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
## Add .epub if not already there.
|
||||
if( not outputopt.lower().endswith(".epub") ):
|
||||
outputopt=outputopt+".epub"
|
||||
if not options.outputopt.lower().endswith(".epub"):
|
||||
options.outputopt=options.outputopt+".epub"
|
||||
|
||||
print "output file: "+outputopt
|
||||
print "output file: "+options.outputopt
|
||||
doMerge(options.outputopt,
|
||||
args,
|
||||
options.authoropts,
|
||||
options.titleopt,
|
||||
options.descopt,
|
||||
options.fromfirst,
|
||||
options.titlenavpoints,
|
||||
options.striptitletoc)
|
||||
|
||||
# output = StringIO.StringIO()
|
||||
# files = []
|
||||
# for file in args:
|
||||
# f = open(file,"rb")
|
||||
# fio = StringIO.StringIO(f.read())
|
||||
# f.close()
|
||||
# files.append(fio)
|
||||
|
||||
# doMerge(output,files,authoropts,titleopt,descopt,fromfirst,titlenavpoints,striptitletoc)
|
||||
|
||||
# out = open(outputopt,"wb")
|
||||
# out.write(output.getvalue())
|
||||
|
||||
def doMerge(outputio,files,authoropts=[],titleopt=None,descopt=None,
|
||||
fromfirst=False,
|
||||
titlenavpoints=True,
|
||||
striptitletoc=False,
|
||||
forceunique=True):
|
||||
'''
|
||||
outputio = output file name or StringIO.
|
||||
files = list of input file names or StringIOs.
|
||||
authoropts = list of authors to use, otherwise add from all input
|
||||
titleopt = title, otherwise '<first title> Anthology'
|
||||
descopt = description, otherwise '<title> by <author>' list for all input
|
||||
fromfirst if true, take all metadata (including author, title, desc) from first input
|
||||
titlenavpoints if true, put in a new TOC entry for each epub
|
||||
striptitletoc if true, strip out any (title|toc)_page.xhtml files
|
||||
forceunique if true, guarantee uniqueness of contents by adding a dir for each input
|
||||
'''
|
||||
## Python 2.5 ZipFile is rather more primative than later
|
||||
## versions. It can operate on a file, or on a StringIO, but
|
||||
## not on an open stream. OTOH, I suspect we would have had
|
||||
## problems with closing and opening again to change the
|
||||
## compression type anyway.
|
||||
|
||||
filecount=0
|
||||
source=None
|
||||
|
||||
## Write mimetype file, must be first and uncompressed.
|
||||
## Older versions of python(2.4/5) don't allow you to specify
|
||||
## compression by individual file.
|
||||
## Overwrite if existing output file.
|
||||
outputepub = ZipFile(outputopt, "w", compression=ZIP_STORED)
|
||||
outputepub = ZipFile(outputio, "w", compression=ZIP_STORED)
|
||||
outputepub.debug = 3
|
||||
outputepub.writestr("mimetype", "application/epub+zip")
|
||||
outputepub.close()
|
||||
|
||||
## Re-open file for content.
|
||||
outputepub = ZipFile(outputopt, "a", compression=ZIP_DEFLATED)
|
||||
outputepub = ZipFile(outputio, "a", compression=ZIP_DEFLATED)
|
||||
outputepub.debug = 3
|
||||
|
||||
## Create META-INF/container.xml file. The only thing it does is
|
||||
|
|
@ -110,12 +145,20 @@ def main():
|
|||
booktitles = [] # list of strings -- Each book's title
|
||||
allauthors = [] # list of lists of strings -- Each book's list of authors.
|
||||
|
||||
booknum=1
|
||||
for filename in args:
|
||||
print "input file: "+filename
|
||||
book = "%d" % booknum
|
||||
filelist = []
|
||||
|
||||
epub = ZipFile(filename, 'r')
|
||||
booknum=1
|
||||
firstmetadom = None
|
||||
for file in files:
|
||||
book = "%d" % booknum
|
||||
bookdir = ""
|
||||
bookid = ""
|
||||
if forceunique:
|
||||
bookdir = "%d/" % booknum
|
||||
bookid = "a%d" % booknum
|
||||
#print "book %d" % booknum
|
||||
|
||||
epub = ZipFile(file, 'r')
|
||||
|
||||
## Find the .opf file.
|
||||
container = epub.read("META-INF/container.xml")
|
||||
|
|
@ -129,6 +172,10 @@ def main():
|
|||
relpath=relpath+"/"
|
||||
|
||||
metadom = parseString(epub.read(rootfilename))
|
||||
if booknum==1:
|
||||
firstmetadom = metadom.getElementsByTagName("metadata")[0]
|
||||
source=firstmetadom.getElementsByTagName("dc:source")[0].firstChild.data.encode("utf-8")
|
||||
#print "Source:%s"%source
|
||||
|
||||
## Save indiv book title
|
||||
booktitles.append(metadom.getElementsByTagName("dc:title")[0].firstChild.data)
|
||||
|
|
@ -147,22 +194,33 @@ def main():
|
|||
tocdom = parseString(epub.read(relpath+item.getAttribute("href")))
|
||||
|
||||
for navpoint in tocdom.getElementsByTagName("navPoint"):
|
||||
navpoint.setAttribute("id","a"+book+navpoint.getAttribute("id"))
|
||||
navpoint.setAttribute("id",bookid+navpoint.getAttribute("id"))
|
||||
|
||||
for content in tocdom.getElementsByTagName("content"):
|
||||
content.setAttribute("src",book+"/"+relpath+content.getAttribute("src"))
|
||||
content.setAttribute("src",bookdir+relpath+content.getAttribute("src"))
|
||||
|
||||
navmaps.append(tocdom.getElementsByTagName("navMap")[0])
|
||||
else:
|
||||
id="a"+book+item.getAttribute("id")
|
||||
href=book+"/"+relpath+item.getAttribute("href")
|
||||
id=bookid+item.getAttribute("id")
|
||||
href=bookdir+relpath+item.getAttribute("href")
|
||||
href=href.encode('utf8')
|
||||
items.append((id,href,item.getAttribute("media-type")))
|
||||
#print "href:"+href
|
||||
if not striptitletoc or not re.match(r'.*/(title|toc)_page\.xhtml',
|
||||
item.getAttribute("href")):
|
||||
if href not in filelist:
|
||||
try:
|
||||
outputepub.writestr(href,
|
||||
epub.read(relpath+item.getAttribute("href")))
|
||||
if re.match(r'.*/file\d+\.xhtml',href):
|
||||
filecount+=1
|
||||
items.append((id,href,item.getAttribute("media-type")))
|
||||
filelist.append(href)
|
||||
except KeyError, ke:
|
||||
pass # Skip missing files.
|
||||
|
||||
for itemref in metadom.getElementsByTagName("itemref"):
|
||||
itemrefs.append("a"+book+itemref.getAttribute("idref"))
|
||||
if not striptitletoc or not re.match(r'(title|toc)_page', itemref.getAttribute("idref")):
|
||||
itemrefs.append(bookid+itemref.getAttribute("idref"))
|
||||
|
||||
booknum=booknum+1;
|
||||
|
||||
|
|
@ -170,13 +228,19 @@ def main():
|
|||
uniqueid="epubmerge-uid-%d" % time() # real sophisticated uid scheme.
|
||||
contentdom = getDOMImplementation().createDocument(None, "package", None)
|
||||
package = contentdom.documentElement
|
||||
if fromfirst and firstmetadom:
|
||||
metadata = firstmetadom
|
||||
firstpackage = firstmetadom.parentNode
|
||||
package.setAttribute("version",firstpackage.getAttribute("version"))
|
||||
package.setAttribute("xmlns",firstpackage.getAttribute("xmlns"))
|
||||
package.setAttribute("unique-identifier",firstpackage.getAttribute("unique-identifier"))
|
||||
else:
|
||||
package.setAttribute("version","2.0")
|
||||
package.setAttribute("xmlns","http://www.idpf.org/2007/opf")
|
||||
package.setAttribute("unique-identifier","epubmerge-id")
|
||||
metadata=newTag(contentdom,"metadata",
|
||||
attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/",
|
||||
"xmlns:opf":"http://www.idpf.org/2007/opf"})
|
||||
package.appendChild(metadata)
|
||||
metadata.appendChild(newTag(contentdom,"dc:identifier",text=uniqueid,attrs={"id":"epubmerge-id"}))
|
||||
if( titleopt is None ):
|
||||
titleopt = booktitles[0]+" Anthology"
|
||||
|
|
@ -202,10 +266,15 @@ def main():
|
|||
metadata.appendChild(newTag(contentdom,"dc:rights",text="Copyrights as per source stories"))
|
||||
metadata.appendChild(newTag(contentdom,"dc:language",text="en"))
|
||||
|
||||
if not descopt:
|
||||
# created now, but not filled in until TOC generation to save loops.
|
||||
description = newTag(contentdom,"dc:description",text="Anthology containing:\n")
|
||||
else:
|
||||
description = newTag(contentdom,"dc:description",text=descopt)
|
||||
metadata.appendChild(description)
|
||||
|
||||
package.appendChild(metadata)
|
||||
|
||||
manifest = contentdom.createElement("manifest")
|
||||
package.appendChild(manifest)
|
||||
for item in items:
|
||||
|
|
@ -245,23 +314,28 @@ def main():
|
|||
tocnavMap = tocncxdom.createElement("navMap")
|
||||
ncx.appendChild(tocnavMap)
|
||||
|
||||
## TOC navPoints can ge nested, but this flattens them for
|
||||
## TOC navPoints can be nested, but this flattens them for
|
||||
## simplicity, plus adds a navPoint for each epub.
|
||||
booknum=0
|
||||
for navmap in navmaps:
|
||||
navpoints = navmap.getElementsByTagName("navPoint")
|
||||
if titlenavpoints:
|
||||
## Copy first navPoint of each epub, give a different id and
|
||||
## text: bookname by authorname
|
||||
newnav = navpoints[0].cloneNode(True)
|
||||
newnav.setAttribute("id","book"+newnav.getAttribute("id"))
|
||||
## For purposes of TOC titling & desc, use first book author
|
||||
newtext = newTag(tocncxdom,"text",text=booktitles[booknum]+" by "+allauthors[booknum][0])
|
||||
description.appendChild(contentdom.createTextNode(booktitles[booknum]+" by "+allauthors[booknum][0]+"\n"))
|
||||
text = newnav.getElementsByTagName("text")[0]
|
||||
text.parentNode.replaceChild(newtext,text)
|
||||
tocnavMap.appendChild(newnav)
|
||||
|
||||
if not descopt and not fromfirst:
|
||||
description.appendChild(contentdom.createTextNode(booktitles[booknum]+" by "+allauthors[booknum][0]+"\n"))
|
||||
|
||||
for navpoint in navpoints:
|
||||
#print "navpoint:%s"%navpoint.getAttribute("id")
|
||||
if not striptitletoc or not re.match(r'(title|toc)_page',navpoint.getAttribute("id")):
|
||||
tocnavMap.appendChild(navpoint)
|
||||
booknum=booknum+1;
|
||||
|
||||
|
|
@ -283,6 +357,8 @@ def main():
|
|||
zf.create_system = 0
|
||||
outputepub.close()
|
||||
|
||||
return (source,filecount)
|
||||
|
||||
## Utility method for creating new tags.
|
||||
def newTag(dom,name,attrs=None,text=None):
|
||||
tag = dom.createElement(name)
|
||||
|
|
@ -294,4 +370,4 @@ def newTag(dom,name,attrs=None,text=None):
|
|||
return tag
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main(sys.argv[1:])
|
||||
|
|
|
|||
|
|
@ -75,7 +75,6 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
|
|||
self.story.setMetadata('dateUpdated',datetime.datetime.now())
|
||||
else:
|
||||
self.story.setMetadata('dateUpdated',makeDate("1975-01-31","%Y-%m-%d"))
|
||||
self.story.setMetadata('numChapters','5')
|
||||
self.story.setMetadata('numWords','123456')
|
||||
self.story.setMetadata('status','In-Completed')
|
||||
self.story.setMetadata('rating','Tweenie')
|
||||
|
|
@ -99,17 +98,39 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
|
|||
('Chapter 1, Xenos on Cinnabar',self.url+"&chapter=2"),
|
||||
('Chapter 2, Sinmay on Kintikin',self.url+"&chapter=3"),
|
||||
('Chapter 3, Over Cinnabar',self.url+"&chapter=4"),
|
||||
('Epilogue',self.url+"&chapter=5")]
|
||||
('Chapter 4',self.url+"&chapter=5"),
|
||||
('Chapter 5',self.url+"&chapter=6"),
|
||||
('Chapter 6',self.url+"&chapter=6"),
|
||||
('Chapter 7',self.url+"&chapter=6"),
|
||||
('Chapter 8',self.url+"&chapter=6"),
|
||||
('Chapter 9',self.url+"&chapter=6"),
|
||||
('Chapter 0',self.url+"&chapter=6"),
|
||||
('Chapter a',self.url+"&chapter=6"),
|
||||
('Chapter b',self.url+"&chapter=6"),
|
||||
('Chapter c',self.url+"&chapter=6"),
|
||||
('Chapter d',self.url+"&chapter=6"),
|
||||
('Chapter e',self.url+"&chapter=6"),
|
||||
('Chapter f',self.url+"&chapter=6"),
|
||||
('Chapter g',self.url+"&chapter=6"),
|
||||
('Chapter h',self.url+"&chapter=6"),
|
||||
('Chapter i',self.url+"&chapter=6"),
|
||||
('Chapter j',self.url+"&chapter=6"),
|
||||
('Chapter k',self.url+"&chapter=6"),
|
||||
('Chapter l',self.url+"&chapter=6"),
|
||||
('Chapter m',self.url+"&chapter=6"),
|
||||
('Chapter n',self.url+"&chapter=6"),
|
||||
]
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
if self.story.getMetadata('storyId') == '667':
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url)
|
||||
|
||||
if self.story.getMetadata('storyId') == '670':
|
||||
time.sleep(2.0)
|
||||
|
||||
|
||||
if "chapter=1" in url :
|
||||
text=u'''
|
||||
<div>
|
||||
|
|
|
|||
|
|
@ -69,6 +69,8 @@ class BaseSiteAdapter(Configurable):
|
|||
self.story.setMetadata('site',self.getSiteDomain())
|
||||
self.story.setMetadata('dateCreated',datetime.datetime.now())
|
||||
self.chapterUrls = [] # tuples of (chapter title,chapter url)
|
||||
self.chapterFirst = None
|
||||
self.chapterLast = None
|
||||
## order of preference for decoding.
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252"] # 1252 is a superset of
|
||||
|
|
@ -136,11 +138,23 @@ class BaseSiteAdapter(Configurable):
|
|||
logging.exception(excpt)
|
||||
raise(excpt)
|
||||
|
||||
# Limit chapters to download. Input starts at 1, list starts at 0
|
||||
def setChaptersRange(self,first=None,last=None):
|
||||
if first:
|
||||
self.chapterFirst=int(first)-1
|
||||
if last:
|
||||
self.chapterLast=int(last)-1
|
||||
|
||||
# Does the download the first time it's called.
|
||||
def getStory(self):
|
||||
if not self.storyDone:
|
||||
self.getStoryMetadataOnly()
|
||||
for (title,url) in self.chapterUrls:
|
||||
for index, (title,url) in enumerate(self.chapterUrls):
|
||||
if (self.chapterFirst!=None and index < self.chapterFirst) or \
|
||||
(self.chapterLast!=None and index > self.chapterLast):
|
||||
self.story.addChapter(removeEntities(title),
|
||||
None)
|
||||
else:
|
||||
self.story.addChapter(removeEntities(title),
|
||||
removeEntities(self.getChapterText(url)))
|
||||
self.storyDone = True
|
||||
|
|
|
|||
|
|
@ -172,19 +172,21 @@ class BaseStoryWriter(Configurable):
|
|||
names as Story.metadata, but ENTRY should use index and chapter.
|
||||
"""
|
||||
# Only do TOC if there's more than one chapter and it's configured.
|
||||
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage"):
|
||||
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
|
||||
self._write(out,START.substitute(self.story.metadata))
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
|
||||
|
||||
self._write(out,END.substitute(self.story.metadata))
|
||||
|
||||
# if no outstream is given, write to file.
|
||||
def writeStory(self,outstream=None):
|
||||
def writeStory(self,outstream=None,metaonly=False):
|
||||
for tag in self.getConfigList("extratags"):
|
||||
self.story.addToList("extratags",tag)
|
||||
|
||||
self.metaonly = metaonly
|
||||
outfilename=self.getOutputFileName()
|
||||
|
||||
if not outstream:
|
||||
|
|
@ -207,22 +209,24 @@ class BaseStoryWriter(Configurable):
|
|||
if fileupdated > lastupdated:
|
||||
print "File(%s) Updated(%s) more recently than Story(%s) - Skipping" % (outfilename,fileupdated,lastupdated)
|
||||
return
|
||||
|
||||
self.story = self.adapter.getStory() # get full story now,
|
||||
# just before
|
||||
# writing. Fetch
|
||||
# before opening
|
||||
# file.
|
||||
if not metaonly:
|
||||
self.story = self.adapter.getStory() # get full story
|
||||
# now, just
|
||||
# before writing.
|
||||
# Fetch before
|
||||
# opening file.
|
||||
outstream = open(outfilename,"wb")
|
||||
else:
|
||||
close=False
|
||||
logging.debug("Save to stream")
|
||||
|
||||
if not metaonly:
|
||||
self.story = self.adapter.getStory() # get full story now,
|
||||
# just before writing.
|
||||
# Okay if double called
|
||||
# with above, it will
|
||||
# only fetch once.
|
||||
# just before
|
||||
# writing. Okay if
|
||||
# double called with
|
||||
# above, it will only
|
||||
# fetch once.
|
||||
if self.getConfig('zip_output'):
|
||||
out = StringIO.StringIO()
|
||||
self.writeStoryImpl(out)
|
||||
|
|
|
|||
|
|
@ -292,10 +292,11 @@ h6 { text-align: center; }
|
|||
if self.getConfig("include_titlepage"):
|
||||
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
|
||||
itemrefs.append("title_page")
|
||||
if self.getConfig("include_tocpage"):
|
||||
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
|
||||
items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
|
||||
itemrefs.append("toc_page")
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
i=index+1
|
||||
items.append(("file%04d"%i,
|
||||
"OEBPS/file%04d.xhtml"%i,
|
||||
|
|
@ -407,11 +408,13 @@ h6 { text-align: center; }
|
|||
tocpageIO.close()
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
|
||||
# ffnet(& maybe others) gives the whole chapter text as
|
||||
# one line. This causes problems for nook(at least) when
|
||||
# the chapter size starts getting big (200k+)
|
||||
# ffnet(& maybe others) gives the whole chapter text
|
||||
# as one line. This causes problems for nook(at
|
||||
# least) when the chapter size starts getting big
|
||||
# (200k+)
|
||||
fullhtml = fullhtml.replace('</p>','</p>\n').replace('<br />','<br />\n')
|
||||
outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8'))
|
||||
del fullhtml
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ class HTMLWriter(BaseStoryWriter):
|
|||
self.HTML_TOC_PAGE_END)
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
|
||||
self._write(out,html)
|
||||
|
|
|
|||
|
|
@ -166,11 +166,13 @@ class MobiWriter(BaseStoryWriter):
|
|||
# tocpageIO.close()
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
|
||||
# ffnet(& maybe others) gives the whole chapter text as
|
||||
# one line. This causes problems for nook(at least) when
|
||||
# the chapter size starts getting big (200k+)
|
||||
# ffnet(& maybe others) gives the whole chapter text
|
||||
# as one line. This causes problems for nook(at
|
||||
# least) when the chapter size starts getting big
|
||||
# (200k+)
|
||||
fullhtml = fullhtml.replace('</p>','</p>\n').replace('<br />','<br />\n')
|
||||
files.append(fullhtml.encode('utf-8'))
|
||||
del fullhtml
|
||||
|
|
|
|||
|
|
@ -128,9 +128,9 @@ End file.
|
|||
self._write(out,self.lineends(self.wraplines(towrap)))
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))
|
||||
|
||||
self._write(out,self.lineends(html2text(html)))
|
||||
|
||||
self._write(out,self.lineends(self.wraplines(self.TEXT_FILE_END.substitute(self.story.metadata))))
|
||||
|
|
|
|||
|
|
@ -50,11 +50,11 @@
|
|||
{% for fic in fics %}
|
||||
<p>
|
||||
{% if fic.completed %}
|
||||
<span class="recent"><a href="/file?id={{ fic.key }}">Download {{ fic.title }}</a></span>
|
||||
<span class="recent"><a href="/file?id={{ fic.key }}">Download <i>{{ fic.title }}</i></a></span>
|
||||
by {{ fic.author }} ({{ fic.format }})
|
||||
{% endif %}
|
||||
{% if not fic.completed and not fic.failure %}
|
||||
<span class="recent">Processing {{ fic.title }}</span>
|
||||
<span class="recent">Processing <i>{{ fic.title }}</i></span>
|
||||
by {{ fic.author }} ({{ fic.format }})
|
||||
{% endif %}
|
||||
{% if fic.failure %}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@
|
|||
<p>
|
||||
{% if fic.completed %}
|
||||
<p>Your fic has finished processing and you can download it now.</p>
|
||||
<span class="recent"><a href="/file?id={{ fic.key }}">Download {{ fic.title }}</a></span>
|
||||
<span class="recent"><a href="/file?id={{ fic.key }}">Download <i>{{ fic.title }}</i></a></span>
|
||||
by {{ fic.author }} ({{ fic.format }})
|
||||
{% endif %}
|
||||
{% if fic.failure %}
|
||||
|
|
@ -55,7 +55,7 @@
|
|||
{% endif %}
|
||||
{% if not fic.completed and not fic.failure %}
|
||||
<p>Not done yet. This page will periodically poll to see if your story has finished.</p>
|
||||
<span class="recent">Processing {{ fic.title }}</span>
|
||||
<span class="recent">Processing <i>{{ fic.title }}</i></span>
|
||||
by {{ fic.author }} ({{ fic.format }})
|
||||
{% endif %}
|
||||
<a href="{{ fic.url }}" title="Link to original story">Source</a>
|
||||
|
|
|
|||
Loading…
Reference in a new issue