mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-04-29 18:35:13 +02:00
177 lines
5 KiB
Python
177 lines
5 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
from __future__ import with_statement
|
|
|
|
import sys
|
|
import os
|
|
import zlib
|
|
import zipfile
|
|
from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
|
|
from contextlib import closing
|
|
import logging
|
|
|
|
import BeautifulSoup as bs
|
|
import htmlentitydefs as hdefs
|
|
import time
|
|
import datetime
|
|
from datetime import timedelta
|
|
|
|
import StringIO
|
|
|
|
class InvalidEPub(Exception):
|
|
pass
|
|
|
|
def checkNewer(filename, curdte):
|
|
ret = True
|
|
|
|
if not os.path.isfile(filename):
|
|
logging.debug('File %s does not already exist.' % filename)
|
|
return ret
|
|
|
|
#logging.debug('filename=%s, curdte=%s' % (filename, curdte))
|
|
lastdate = None
|
|
with closing(ZipFile(open(filename, 'rb'))) as epub:
|
|
titleFilePath = "OEBPS/title_page.xhtml"
|
|
contentFilePath = "OEBPS/content.opf"
|
|
|
|
namelist = set(epub.namelist())
|
|
#logging.debug('namelist=%s' % namelist)
|
|
if 'mimetype' not in namelist or \
|
|
'META-INF/container.xml' not in namelist:
|
|
#raise InvalidEPub('%s: not a valid EPUB' % filename)
|
|
logging.debug('File %s is not a valid EPub format file.' % filename)
|
|
return ret
|
|
|
|
if contentFilePath not in namelist:
|
|
return ret # file is not newer
|
|
|
|
data = epub.read(contentFilePath)
|
|
soup = bs.BeautifulStoneSoup(data)
|
|
lstdte = soup.find ('dc:date', {'opf:event' : 'modification'})
|
|
#logging.debug('lstdte=%s' % lstdte.string)
|
|
if lstdte is None and titleFilePath in namelist:
|
|
data = epub.read(titleFilePath)
|
|
soup = bs.BeautifulStoneSoup(data)
|
|
fld = ''
|
|
allTDs = soup.findAll ('td')
|
|
for td in allTDs:
|
|
b = td.find ('b')
|
|
if b is not None:
|
|
fld = b.string
|
|
if td.string is not None and fld == "Updated:":
|
|
lastdate = td.string
|
|
#logging.debug('title lastdate=%s' % lastdate)
|
|
else:
|
|
lastdate = lstdte.string.strip(' ')
|
|
#logging.debug('contents lastdate=%s' % lastdate)
|
|
|
|
if lastdate is not None:
|
|
currUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(curdte.strftime('%Y-%m-%d'), "%Y-%m-%d")))
|
|
storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(lastdate, "%Y-%m-%d")))
|
|
logging.debug('File %s last update date is %s, comparing to %s' % (filename, storyUpdated, currUpdated))
|
|
if currUpdated <= storyUpdated :
|
|
ret = False
|
|
|
|
logging.debug("Does %s need to be updated? %s" % (filename, ret))
|
|
return ret
|
|
|
|
|
|
def toZip(filename, directory):
|
|
zippedHelp = zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED)
|
|
lst = os.listdir(directory)
|
|
|
|
for entity in lst:
|
|
if entity.startswith('.'):
|
|
continue
|
|
|
|
each = os.path.join(directory,entity)
|
|
print(each)
|
|
|
|
if os.path.isfile(each):
|
|
print(each)
|
|
# epub standard requires mimetype to be uncompressed and first file.
|
|
if entity == 'mimetype':
|
|
zippedHelp.write(each, arcname=entity, compress_type=zipfile.ZIP_STORED)
|
|
else:
|
|
zippedHelp.write(each, arcname=entity)
|
|
else:
|
|
addFolderToZip(zippedHelp,entity, each)
|
|
|
|
zippedHelp.close()
|
|
|
|
def addFolderToZip(zippedHelp,folder,fpath):
|
|
#print('addFolderToZip(%s)' % folder)
|
|
|
|
if folder == '.' or folder == '..':
|
|
return
|
|
|
|
folderFiles = os.listdir(fpath)
|
|
for f in folderFiles:
|
|
if os.path.isfile(fpath + '/' + f):
|
|
#print('basename=%s' % os.path.basename(fpath + '/' + f))
|
|
zippedHelp.write(fpath + '/' + f, folder + '/' + f, zipfile.ZIP_DEFLATED)
|
|
elif os.path.isdir(f):
|
|
addFolderToZip(zippedHelp,f)
|
|
|
|
def inMemoryZip(files):
|
|
# files have a structure of {'path/to/file' => content} dictionary
|
|
io = StringIO.StringIO()
|
|
|
|
if 'mimetype' in files:
|
|
# This fixes the uncompressed mimetype-first issue by opening
|
|
# the in memory file as STORE, putting in the mimetype, then
|
|
# closing and re-opening with DEFLATED. while it is often
|
|
# true that mimetype is the first file, we can't assume it,
|
|
# because the dict object is defined as unordered.
|
|
path='mimetype'
|
|
memzip = zipfile.ZipFile(io, 'a', compression=zipfile.ZIP_STORED)
|
|
memzip.debug = 3
|
|
if type(files[path]) != type('str'):
|
|
data = files[path].getvalue()
|
|
else:
|
|
data = files[path]
|
|
|
|
logging.debug("Writing ZIP path %s" % path)
|
|
try:
|
|
memzip.writestr(path, data.encode('utf-8'))
|
|
except UnicodeDecodeError, e:
|
|
memzip.writestr(path.encode('utf-8'), data.encode('utf-8'))
|
|
|
|
memzip.close()
|
|
|
|
# remove it from the files dict.
|
|
del(files['mimetype'])
|
|
|
|
# open in 'a' append mode.
|
|
memzip = zipfile.ZipFile(io, 'a', compression=zipfile.ZIP_DEFLATED)
|
|
memzip.debug = 3
|
|
|
|
for path in files:
|
|
if type(files[path]) != type('str'):
|
|
data = files[path].getvalue()
|
|
else:
|
|
data = files[path]
|
|
|
|
# logging.debug(data)
|
|
logging.debug("Writing ZIP path %s" % path)
|
|
try:
|
|
memzip.writestr(path, data.encode('utf-8'))
|
|
except UnicodeDecodeError, e:
|
|
memzip.writestr(path.encode('utf-8'), data.encode('utf-8'))
|
|
|
|
# declares all the files created by Windows.
|
|
for zf in memzip.filelist:
|
|
zf.create_system = 0
|
|
|
|
memzip.close()
|
|
|
|
return io
|
|
|
|
if __name__ == '__main__':
|
|
# toZip('sample.epub', "books/A_Time_To_Reflect")
|
|
# z = zipfile.ZipFile('sample.epub', 'r')
|
|
files = {'test.txt' : 'test', 'data/abc.txt' : 'abc'}
|
|
data = inMemoryZip(files)
|
|
f = open('res.zip', 'w')
|
|
f.write(data)
|
|
f.close()
|