From 1740e0da48e4e147583e6cf3a69ae25d49fa78eb Mon Sep 17 00:00:00 2001 From: sigizmund Date: Mon, 21 Dec 2009 13:12:47 +0000 Subject: [PATCH] Changes while moving to AJAX service --- adapter.py | 7 +++++++ downaloder.py | 13 ++++++++++--- ffa.py | 50 ++++++++++++++++++++++++++++++++++++++++++++------ ffnet.py | 4 ++-- output.py | 45 ++++++++++++++++++++++++--------------------- zipdir.py | 6 +++++- 6 files changed, 92 insertions(+), 33 deletions(-) diff --git a/adapter.py b/adapter.py index ff1cb8ef..bf37a017 100644 --- a/adapter.py +++ b/adapter.py @@ -1,3 +1,10 @@ +class LoginRequiredException(Exception): + def __init__(self, url): + self.url = url + + def __str__(self): + return repr(self.url + ' requires user to be logged in') + class FanfictionSiteAdapter: login = '' password = '' diff --git a/downaloder.py b/downaloder.py index e119f810..dee7a30d 100644 --- a/downaloder.py +++ b/downaloder.py @@ -12,13 +12,17 @@ import urlparse as up import BeautifulSoup as bs import htmlentitydefs as hdefs + import ffa import ffnet import ficwad import output +import adapter import fictionalley import hpfiction +import time + class FanficLoader: '''A controller class which handles the interaction between various specific downloaders and writers''' booksDirectory = "books" @@ -30,7 +34,10 @@ class FanficLoader: self.inmemory = inmemory self.compress = compress self.badLogin = False - + + def getAdapter(): + return self.adapter + def download(self): logging.debug("Trying to download the story") if self.adapter.requiresLogin(): @@ -38,7 +45,7 @@ class FanficLoader: if not self.adapter.performLogin(): logging.debug("Login/password problem") self.badLogin = True - return None + raise adapter.LoginRequiredException(self.adapter.url) urls = self.adapter.extractIndividualUrls() self.writer = self.writerClass(self.booksDirectory, self.adapter.getStoryName(), self.adapter.getAuthorName(), inmemory=self.inmemory, compress=self.compress) @@ -50,7 +57,7 @@ class FanficLoader: i = i+1 text = self.adapter.getText(u) self.writer.writeChapter(n, text) - + self.writer.finalise() if self.inmemory: diff --git a/ffa.py b/ffa.py index b8c307ec..ca81c6d5 100644 --- a/ffa.py +++ b/ffa.py @@ -39,13 +39,21 @@ class FFA(FanfictionSiteAdapter): def _getLoginScript(self): return self.path - def requiresLogin(self, url = None): - resp = self.opener.open(self.url) - data = resp.read() - if data.find('Please login to continue') != -1: + def reqLoginData(self, data): + if data.find('Please login to continue') != -1 or data.find('

Username or password not found. Please') != -1 or data.find("This story is rated Mature, you must be logged in to view it") != -1: return True else: return False + + def requiresLogin(self, url = None): + if url == None: + u = self.url + else: + u = url + + resp = self.opener.open(u) + data = resp.read() + return self.reqLoginData(data) def performLogin(self, url = None): if url == None: @@ -63,7 +71,9 @@ class FFA(FanfictionSiteAdapter): req = self.opener.open(loginUrl, urlvals) - if self.requiresLogin(): + d = req.read() + + if self.reqLoginData(d) : return False else: return True @@ -72,7 +82,16 @@ class FFA(FanfictionSiteAdapter): data = self.opener.open(self.url).read() soup = bs.BeautifulStoneSoup(data) - self.author = soup.find('a', {'href' : '/contact/'}).string + if self.reqLoginData(data): + logging.debug('Data requires login, trying to login') + if not self.performLogin(url): + logging.error('Cannot login, raising exception ... ') + raise LoginRequiredException(url) + else: + data = self.opener.open(url).read() + + + self.author = str(soup.find('a', {'href' : '/contact/'}).string) self.storyName = str(soup.find('h1', {'class' : 'textCenter'}).contents[0]).strip() logging.debug("Story `%s` by `%s`" % (self.storyName, self.author)) @@ -97,6 +116,14 @@ class FFA(FanfictionSiteAdapter): logging.info('Downloading: %s' % url) data = self.opener.open(url).read() + if self.reqLoginData(data): + logging.debug('Data requires login, trying to login') + if not self.performLogin(url): + logging.error('Cannot login, raising exception ... ') + raise LoginRequiredException(url) + else: + data = self.opener.open(url).read() + lines = data.split('\n') emit = False @@ -192,6 +219,17 @@ class FFA_UnitTests(unittest.TestCase): data = f.getText(url) seek = 'So Hokage-sama” I said, “this is how we came' self.assertTrue(data.find(seek) != -1) + + def testSemiLoginRequired(self): + f = FFA('http://viridian.fanficauthors.net/Harry_Potter_and_the_Nightmares_of_Futures_Past/The_End_of_Days/') + + urls = f.extractIndividualUrls() + + try: + data = f.getText('http://viridian.fanficauthors.net/Harry_Potter_and_the_Nightmares_of_Futures_Past/Doing_the_Mungo_Shuffle/') + self.assertTrue(False) + except LoginRequiredException, e: + self.assertTrue(True) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/ffnet.py b/ffnet.py index ada03a25..852c6097 100644 --- a/ffnet.py +++ b/ffnet.py @@ -92,10 +92,10 @@ class FFNet(FanfictionSiteAdapter): for l in lines: if l.find("»") != -1 and l.find('') != -1: s2 = bs.BeautifulStoneSoup(l) - self.storyName = s2.find('b').string + self.storyName = str(s2.find('b').string) elif l.find(" 0: continue diff --git a/output.py b/output.py index 36961468..aeea8cda 100644 --- a/output.py +++ b/output.py @@ -13,6 +13,7 @@ import os.path import zipfile import StringIO import logging +import hashlib import urllib as u import pprint as pp import urllib2 as u2 @@ -84,14 +85,20 @@ class EPubFanficWriter(FanficWriter): files = {} def _writeFile(self, fileName, data): + logging.debug('_writeFile(`%s`, data)' % fileName) if fileName in self.files: - self.files[fileName].write(data.decode('utf-8')) + try: + d = data.decode('utf-8') + except UnicodeEncodeError, e: + d = data + + self.files[fileName].write(d) else: if self.inmemory: self.files[fileName] = StringIO.StringIO() else: self.files[fileName] = open(self.directory + '/' + fileName, 'w') - + self._writeFile(fileName, data) @@ -127,10 +134,6 @@ class EPubFanficWriter(FanficWriter): os.mkdir(self.directory + '/META-INF') os.mkdir(self.directory + '/OEBPS') -# print >> codecs.open(self.directory + '/mimetype', 'w', 'utf-8'), MIMETYPE -# print >> codecs.open(self.directory + '/META-INF/container.xml', 'w', 'utf-8'), CONTAINER -# print >> codecs.open(self.directory + '/OEBPS/stylesheet.css', 'w', 'utf-8'), CSS - self._writeFile('mimetype', MIMETYPE) self._writeFile('META-INF/container.xml', CONTAINER) self._writeFile('OEBPS/stylesheet.css', CSS) @@ -145,7 +148,15 @@ class EPubFanficWriter(FanficWriter): return text def writeChapter(self, title, text): - fileName = base64.b64encode(title).replace('/', '_') + ".xhtml" + logging.debug("Writing chapter: %s" % title) + try: + fileName = base64.b64encode(title).replace('/', '_') + ".xhtml" + except UnicodeEncodeError, e: + fileName = base64.b64encode(title.encode('utf-8')).replace('/', '_') + ".xhtml" +# title = cgi.esca#title.decode('utf-8') +# sha = hashlib.sha224(title) +# fileName = sha.hexdigest() + ".xhtml" + #fileName = cgi.escape(title) + '.xhtml' filePath = self.directory + "/OEBPS/" + fileName fn = 'OEBPS/' + fileName @@ -154,7 +165,7 @@ class EPubFanficWriter(FanficWriter): text = self._removeEntities(text) - self.soup = bs.BeautifulStoneSoup(text) + self.soup = bs.BeautifulStoneSoup(text.decode('utf-8')) allTags = self.soup.findAll(recursive=True) for t in allTags: @@ -206,10 +217,12 @@ class EPubFanficWriter(FanficWriter): i = 0 for t,f in self.chapters: - chapterId = base64.b64encode(t) -# print >> toc, TOC_ITEM % (chapterId, i, cgi.escape(t), f) + try: + chapterId = base64.b64encode(t) + except UnicodeEncodeError, e: + chapterId = base64.b64encode(t.encode('utf-8')) + self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, cgi.escape(t), f)) -# print >> opf, CONTENT_ITEM % (chapterId, f) self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f)) ids.append(chapterId) @@ -218,24 +231,14 @@ class EPubFanficWriter(FanficWriter): # logging.d('Toc and refs printed, proceesing to ref-ids....') -# print >> toc, TOC_END -# print >> opf, CONTENT_END_MANIFEST - self._writeFile(tocFilePath, TOC_END) self._writeFile(opfFilePath, CONTENT_END_MANIFEST) for chapterId in ids: -# print >> opf, CONTENT_ITEMREF % chapterId self._writeFile(opfFilePath, CONTENT_ITEMREF % chapterId) -# print >> opf, CONTENT_END self._writeFile(opfFilePath, CONTENT_END) -# opf.close() -# toc.close() - -# print('Finished') - self._closeFiles() filename = self.directory + '.epub' diff --git a/zipdir.py b/zipdir.py index 0da2691d..a0a568e6 100644 --- a/zipdir.py +++ b/zipdir.py @@ -50,7 +50,11 @@ def inMemoryZip(files): data = files[path] # logging.debug(data) - memzip.writestr(path, data.encode('utf-8')) + logging.debug("Writing ZIP path %s" % path) + try: + memzip.writestr(path, data.encode('utf-8')) + except UnicodeDecodeError, e: + memzip.writestr(path.encode('utf-8'), data.encode('utf-8')) for zf in memzip.filelist: zf.create_system = 0