Changes while moving to AJAX service

This commit is contained in:
sigizmund 2009-12-21 13:12:47 +00:00
parent ed9e2992c0
commit 1740e0da48
6 changed files with 92 additions and 33 deletions

View file

@ -1,3 +1,10 @@
class LoginRequiredException(Exception):
def __init__(self, url):
self.url = url
def __str__(self):
return repr(self.url + ' requires user to be logged in')
class FanfictionSiteAdapter:
login = ''
password = ''

View file

@ -12,13 +12,17 @@ import urlparse as up
import BeautifulSoup as bs
import htmlentitydefs as hdefs
import ffa
import ffnet
import ficwad
import output
import adapter
import fictionalley
import hpfiction
import time
class FanficLoader:
'''A controller class which handles the interaction between various specific downloaders and writers'''
booksDirectory = "books"
@ -30,7 +34,10 @@ class FanficLoader:
self.inmemory = inmemory
self.compress = compress
self.badLogin = False
def getAdapter():
return self.adapter
def download(self):
logging.debug("Trying to download the story")
if self.adapter.requiresLogin():
@ -38,7 +45,7 @@ class FanficLoader:
if not self.adapter.performLogin():
logging.debug("Login/password problem")
self.badLogin = True
return None
raise adapter.LoginRequiredException(self.adapter.url)
urls = self.adapter.extractIndividualUrls()
self.writer = self.writerClass(self.booksDirectory, self.adapter.getStoryName(), self.adapter.getAuthorName(), inmemory=self.inmemory, compress=self.compress)
@ -50,7 +57,7 @@ class FanficLoader:
i = i+1
text = self.adapter.getText(u)
self.writer.writeChapter(n, text)
self.writer.finalise()
if self.inmemory:

50
ffa.py
View file

@ -39,13 +39,21 @@ class FFA(FanfictionSiteAdapter):
def _getLoginScript(self):
return self.path
def requiresLogin(self, url = None):
resp = self.opener.open(self.url)
data = resp.read()
if data.find('<legend>Please login to continue</legend>') != -1:
def reqLoginData(self, data):
if data.find('<legend>Please login to continue</legend>') != -1 or data.find('<h4>Username or password not found. Please') != -1 or data.find("This story is rated Mature, you must be logged in to view it") != -1:
return True
else:
return False
def requiresLogin(self, url = None):
if url == None:
u = self.url
else:
u = url
resp = self.opener.open(u)
data = resp.read()
return self.reqLoginData(data)
def performLogin(self, url = None):
if url == None:
@ -63,7 +71,9 @@ class FFA(FanfictionSiteAdapter):
req = self.opener.open(loginUrl, urlvals)
if self.requiresLogin():
d = req.read()
if self.reqLoginData(d) :
return False
else:
return True
@ -72,7 +82,16 @@ class FFA(FanfictionSiteAdapter):
data = self.opener.open(self.url).read()
soup = bs.BeautifulStoneSoup(data)
self.author = soup.find('a', {'href' : '/contact/'}).string
if self.reqLoginData(data):
logging.debug('Data requires login, trying to login')
if not self.performLogin(url):
logging.error('Cannot login, raising exception ... ')
raise LoginRequiredException(url)
else:
data = self.opener.open(url).read()
self.author = str(soup.find('a', {'href' : '/contact/'}).string)
self.storyName = str(soup.find('h1', {'class' : 'textCenter'}).contents[0]).strip()
logging.debug("Story `%s` by `%s`" % (self.storyName, self.author))
@ -97,6 +116,14 @@ class FFA(FanfictionSiteAdapter):
logging.info('Downloading: %s' % url)
data = self.opener.open(url).read()
if self.reqLoginData(data):
logging.debug('Data requires login, trying to login')
if not self.performLogin(url):
logging.error('Cannot login, raising exception ... ')
raise LoginRequiredException(url)
else:
data = self.opener.open(url).read()
lines = data.split('\n')
emit = False
@ -192,6 +219,17 @@ class FFA_UnitTests(unittest.TestCase):
data = f.getText(url)
seek = 'So Hokage-sama” I said, “this is how we came'
self.assertTrue(data.find(seek) != -1)
def testSemiLoginRequired(self):
f = FFA('http://viridian.fanficauthors.net/Harry_Potter_and_the_Nightmares_of_Futures_Past/The_End_of_Days/')
urls = f.extractIndividualUrls()
try:
data = f.getText('http://viridian.fanficauthors.net/Harry_Potter_and_the_Nightmares_of_Futures_Past/Doing_the_Mungo_Shuffle/')
self.assertTrue(False)
except LoginRequiredException, e:
self.assertTrue(True)
if __name__ == '__main__':
unittest.main()

View file

@ -92,10 +92,10 @@ class FFNet(FanfictionSiteAdapter):
for l in lines:
if l.find("&#187;") != -1 and l.find('<b>') != -1:
s2 = bs.BeautifulStoneSoup(l)
self.storyName = s2.find('b').string
self.storyName = str(s2.find('b').string)
elif l.find("<a href='/u/") != -1:
s2 = bs.BeautifulStoneSoup(l)
self.authorName = s2.a.string
self.authorName = str(s2.a.string)
elif l.find("<SELECT title='chapter navigation'") != -1:
if len(urls) > 0:
continue

View file

@ -13,6 +13,7 @@ import os.path
import zipfile
import StringIO
import logging
import hashlib
import urllib as u
import pprint as pp
import urllib2 as u2
@ -84,14 +85,20 @@ class EPubFanficWriter(FanficWriter):
files = {}
def _writeFile(self, fileName, data):
logging.debug('_writeFile(`%s`, data)' % fileName)
if fileName in self.files:
self.files[fileName].write(data.decode('utf-8'))
try:
d = data.decode('utf-8')
except UnicodeEncodeError, e:
d = data
self.files[fileName].write(d)
else:
if self.inmemory:
self.files[fileName] = StringIO.StringIO()
else:
self.files[fileName] = open(self.directory + '/' + fileName, 'w')
self._writeFile(fileName, data)
@ -127,10 +134,6 @@ class EPubFanficWriter(FanficWriter):
os.mkdir(self.directory + '/META-INF')
os.mkdir(self.directory + '/OEBPS')
# print >> codecs.open(self.directory + '/mimetype', 'w', 'utf-8'), MIMETYPE
# print >> codecs.open(self.directory + '/META-INF/container.xml', 'w', 'utf-8'), CONTAINER
# print >> codecs.open(self.directory + '/OEBPS/stylesheet.css', 'w', 'utf-8'), CSS
self._writeFile('mimetype', MIMETYPE)
self._writeFile('META-INF/container.xml', CONTAINER)
self._writeFile('OEBPS/stylesheet.css', CSS)
@ -145,7 +148,15 @@ class EPubFanficWriter(FanficWriter):
return text
def writeChapter(self, title, text):
fileName = base64.b64encode(title).replace('/', '_') + ".xhtml"
logging.debug("Writing chapter: %s" % title)
try:
fileName = base64.b64encode(title).replace('/', '_') + ".xhtml"
except UnicodeEncodeError, e:
fileName = base64.b64encode(title.encode('utf-8')).replace('/', '_') + ".xhtml"
# title = cgi.esca#title.decode('utf-8')
# sha = hashlib.sha224(title)
# fileName = sha.hexdigest() + ".xhtml"
#fileName = cgi.escape(title) + '.xhtml'
filePath = self.directory + "/OEBPS/" + fileName
fn = 'OEBPS/' + fileName
@ -154,7 +165,7 @@ class EPubFanficWriter(FanficWriter):
text = self._removeEntities(text)
self.soup = bs.BeautifulStoneSoup(text)
self.soup = bs.BeautifulStoneSoup(text.decode('utf-8'))
allTags = self.soup.findAll(recursive=True)
for t in allTags:
@ -206,10 +217,12 @@ class EPubFanficWriter(FanficWriter):
i = 0
for t,f in self.chapters:
chapterId = base64.b64encode(t)
# print >> toc, TOC_ITEM % (chapterId, i, cgi.escape(t), f)
try:
chapterId = base64.b64encode(t)
except UnicodeEncodeError, e:
chapterId = base64.b64encode(t.encode('utf-8'))
self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, cgi.escape(t), f))
# print >> opf, CONTENT_ITEM % (chapterId, f)
self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f))
ids.append(chapterId)
@ -218,24 +231,14 @@ class EPubFanficWriter(FanficWriter):
# logging.d('Toc and refs printed, proceesing to ref-ids....')
# print >> toc, TOC_END
# print >> opf, CONTENT_END_MANIFEST
self._writeFile(tocFilePath, TOC_END)
self._writeFile(opfFilePath, CONTENT_END_MANIFEST)
for chapterId in ids:
# print >> opf, CONTENT_ITEMREF % chapterId
self._writeFile(opfFilePath, CONTENT_ITEMREF % chapterId)
# print >> opf, CONTENT_END
self._writeFile(opfFilePath, CONTENT_END)
# opf.close()
# toc.close()
# print('Finished')
self._closeFiles()
filename = self.directory + '.epub'

View file

@ -50,7 +50,11 @@ def inMemoryZip(files):
data = files[path]
# logging.debug(data)
memzip.writestr(path, data.encode('utf-8'))
logging.debug("Writing ZIP path %s" % path)
try:
memzip.writestr(path, data.encode('utf-8'))
except UnicodeDecodeError, e:
memzip.writestr(path.encode('utf-8'), data.encode('utf-8'))
for zf in memzip.filelist:
zf.create_system = 0