FanFicFare/fanficdownloader/zipdir.py

177 lines
5 KiB
Python

# -*- coding: utf-8 -*-
from __future__ import with_statement
import sys
import os
import zlib
import zipfile
from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
from contextlib import closing
import logging
import BeautifulSoup as bs
import htmlentitydefs as hdefs
import time
import datetime
from datetime import timedelta
import StringIO
class InvalidEPub(Exception):
pass
def checkNewer(filename, curdte):
ret = True
if not os.path.isfile(filename):
logging.debug('File %s does not already exist.' % filename)
return ret
#logging.debug('filename=%s, curdte=%s' % (filename, curdte))
lastdate = None
with closing(ZipFile(open(filename, 'rb'))) as epub:
titleFilePath = "OEBPS/title_page.xhtml"
contentFilePath = "OEBPS/content.opf"
namelist = set(epub.namelist())
#logging.debug('namelist=%s' % namelist)
if 'mimetype' not in namelist or \
'META-INF/container.xml' not in namelist:
#raise InvalidEPub('%s: not a valid EPUB' % filename)
logging.debug('File %s is not a valid EPub format file.' % filename)
return ret
if contentFilePath not in namelist:
return ret # file is not newer
data = epub.read(contentFilePath)
soup = bs.BeautifulStoneSoup(data)
lstdte = soup.find ('dc:date', {'opf:event' : 'modification'})
#logging.debug('lstdte=%s' % lstdte.string)
if lstdte is None and titleFilePath in namelist:
data = epub.read(titleFilePath)
soup = bs.BeautifulStoneSoup(data)
fld = ''
allTDs = soup.findAll ('td')
for td in allTDs:
b = td.find ('b')
if b is not None:
fld = b.string
if td.string is not None and fld == "Updated:":
lastdate = td.string
#logging.debug('title lastdate=%s' % lastdate)
else:
lastdate = lstdte.string.strip(' ')
#logging.debug('contents lastdate=%s' % lastdate)
if lastdate is not None:
currUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(curdte.strftime('%Y-%m-%d'), "%Y-%m-%d")))
storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(lastdate, "%Y-%m-%d")))
logging.debug('File %s last update date is %s, comparing to %s' % (filename, storyUpdated, currUpdated))
if currUpdated <= storyUpdated :
ret = False
logging.debug("Does %s need to be updated? %s" % (filename, ret))
return ret
def toZip(filename, directory):
zippedHelp = zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED)
lst = os.listdir(directory)
for entity in lst:
if entity.startswith('.'):
continue
each = os.path.join(directory,entity)
print(each)
if os.path.isfile(each):
print(each)
# epub standard requires mimetype to be uncompressed and first file.
if entity == 'mimetype':
zippedHelp.write(each, arcname=entity, compress_type=zipfile.ZIP_STORED)
else:
zippedHelp.write(each, arcname=entity)
else:
addFolderToZip(zippedHelp,entity, each)
zippedHelp.close()
def addFolderToZip(zippedHelp,folder,fpath):
#print('addFolderToZip(%s)' % folder)
if folder == '.' or folder == '..':
return
folderFiles = os.listdir(fpath)
for f in folderFiles:
if os.path.isfile(fpath + '/' + f):
#print('basename=%s' % os.path.basename(fpath + '/' + f))
zippedHelp.write(fpath + '/' + f, folder + '/' + f, zipfile.ZIP_DEFLATED)
elif os.path.isdir(f):
addFolderToZip(zippedHelp,f)
def inMemoryZip(files):
# files have a structure of {'path/to/file' => content} dictionary
io = StringIO.StringIO()
if 'mimetype' in files:
# This fixes the uncompressed mimetype-first issue by opening
# the in memory file as STORE, putting in the mimetype, then
# closing and re-opening with DEFLATED. while it is often
# true that mimetype is the first file, we can't assume it,
# because the dict object is defined as unordered.
path='mimetype'
memzip = zipfile.ZipFile(io, 'a', compression=zipfile.ZIP_STORED)
memzip.debug = 3
if type(files[path]) != type('str'):
data = files[path].getvalue()
else:
data = files[path]
logging.debug("Writing ZIP path %s" % path)
try:
memzip.writestr(path, data.encode('utf-8'))
except UnicodeDecodeError, e:
memzip.writestr(path.encode('utf-8'), data.encode('utf-8'))
memzip.close()
# remove it from the files dict.
del(files['mimetype'])
# open in 'a' append mode.
memzip = zipfile.ZipFile(io, 'a', compression=zipfile.ZIP_DEFLATED)
memzip.debug = 3
for path in files:
if type(files[path]) != type('str'):
data = files[path].getvalue()
else:
data = files[path]
# logging.debug(data)
logging.debug("Writing ZIP path %s" % path)
try:
memzip.writestr(path, data.encode('utf-8'))
except UnicodeDecodeError, e:
memzip.writestr(path.encode('utf-8'), data.encode('utf-8'))
# declares all the files created by Windows.
for zf in memzip.filelist:
zf.create_system = 0
memzip.close()
return io
if __name__ == '__main__':
# toZip('sample.epub', "books/A_Time_To_Reflect")
# z = zipfile.ZipFile('sample.epub', 'r')
files = {'test.txt' : 'test', 'data/abc.txt' : 'abc'}
data = inMemoryZip(files)
f = open('res.zip', 'w')
f.write(data)
f.close()