Some changes in order to allow the refactoring of functions to work from within the appEngine.

This commit is contained in:
wsuetholz 2010-11-12 21:28:25 -06:00
parent 99ba1b48aa
commit c77498e6ea
5 changed files with 32 additions and 34 deletions

View file

@ -3,6 +3,11 @@
import logging import logging
import datetime import datetime
try:
from google.appengine.api.urlfetch import fetch as googlefetch
appEngineGlob = True
except:
appEngineGlob = False
class LoginRequiredException(Exception): class LoginRequiredException(Exception):
def __init__(self, url): def __init__(self, url):
@ -12,11 +17,7 @@ class LoginRequiredException(Exception):
return repr(self.url + ' requires user to be logged in') return repr(self.url + ' requires user to be logged in')
class FanfictionSiteAdapter: class FanfictionSiteAdapter:
try: appEngine = appEngineGlob
from google.appengine.api.urlfetch import fetch as googlefetch
appEngine = True
except:
appEngine = False
login = '' login = ''
password = '' password = ''
url = '' url = ''
@ -30,6 +31,7 @@ class FanfictionSiteAdapter:
authorURL = '' authorURL = ''
outputStorySep = '-Ukn_' outputStorySep = '-Ukn_'
outputName = '' outputName = ''
outputFileName = ''
storyDescription = '' storyDescription = ''
storyCharacters = [] storyCharacters = []
storySeries = '' storySeries = ''
@ -57,6 +59,12 @@ class FanfictionSiteAdapter:
def hasAppEngine(self): def hasAppEngine(self):
return self.appEngine return self.appEngine
def fetchUrl(self, url):
if not self.appEngine:
return self.opener.open(url).read().decode('utf-8')
else:
return googlefetch(url).content
def requiresLogin(self, url = None): def requiresLogin(self, url = None):
return False return False
@ -86,9 +94,14 @@ class FanfictionSiteAdapter:
def getOutputName(self): def getOutputName(self):
self.outputName = self.storyName.replace(" ", "_") + self.outputStorySep + self.storyId self.outputName = self.storyName.replace(" ", "_") + self.outputStorySep + self.storyId
logging.debug('self.storyId=%s, self.storyName=%s self.outputName=%s' % (self.storyId, self.storyName, self.outputName)) logging.debug('self.outputName=%s' % self.outputName)
return self.outputName return self.outputName
def getOutputFileName(self, booksDirectory, format):
self.outputFileName = booksDirectory + "/" + self.getOutputName() + "." + format
logging.debug('self.outputFileName=%s' % self.outputFileName)
return self.outputNameFileName
def getAuthorURL(self): def getAuthorURL(self):
logging.debug('self.authorURL=%s' % self.authorURL) logging.debug('self.authorURL=%s' % self.authorURL)
return self.authorURL return self.authorURL

View file

@ -55,7 +55,10 @@ class FanficLoader:
urls = self.adapter.extractIndividualUrls() urls = self.adapter.extractIndividualUrls()
s = self.booksDirectory + "/" + self.adapter.getOutputName() + "." + format if (self.adapter.hasAppEngine):
self.overWrite = True
else:
s = self.adapter.getOutputFileName(self.booksDirectory, format)
if not self.overWrite and os.path.isfile(s): if not self.overWrite and os.path.isfile(s):
print >> sys.stderr, "File " + s + " already exists! Skipping!" print >> sys.stderr, "File " + s + " already exists! Skipping!"
exit(10) exit(10)

View file

@ -143,14 +143,8 @@ class FFNet(FanfictionSiteAdapter):
self.addSubject(subj) self.addSubject(subj)
return True return True
def _fetchUrl(self, url):
if not self.appEngine:
return self.opener.open(url).read().decode('utf-8')
else:
return googlefetch(url).content
def extractIndividualUrls(self): def extractIndividualUrls(self):
data = self._fetchUrl(self.url) data = self.fetchUrl(self.url)
d2 = re.sub('&\#[0-9]+;', ' ', data) d2 = re.sub('&\#[0-9]+;', ' ', data)
soup = bs.BeautifulStoneSoup(d2) soup = bs.BeautifulStoneSoup(d2)
allA = soup.findAll('a') allA = soup.findAll('a')
@ -264,7 +258,7 @@ class FFNet(FanfictionSiteAdapter):
def getText(self, url): def getText(self, url):
time.sleep( 2.0 ) time.sleep( 2.0 )
data = self._fetchUrl(url) data = self.fetchUrl(url)
lines = data.split('\n') lines = data.split('\n')
textbuf = '' textbuf = ''

View file

@ -137,14 +137,8 @@ class FPCom(FanfictionSiteAdapter):
self.addSubject(subj) self.addSubject(subj)
return True return True
def _fetchUrl(self, url):
if not self.appEngine:
return self.opener.open(url).read().decode('utf-8')
else:
return googlefetch(url).content
def extractIndividualUrls(self): def extractIndividualUrls(self):
data = self._fetchUrl(self.url) data = self.fetchUrl(self.url)
d2 = re.sub('&\#[0-9]+;', ' ', data) d2 = re.sub('&\#[0-9]+;', ' ', data)
soup = bs.BeautifulStoneSoup(d2) soup = bs.BeautifulStoneSoup(d2)
allA = soup.findAll('a') allA = soup.findAll('a')
@ -283,7 +277,7 @@ class FPCom(FanfictionSiteAdapter):
def getText(self, url): def getText(self, url):
time.sleep( 2.0 ) time.sleep( 2.0 )
data = self._fetchUrl(url) data = self.fetchUrl(url)
lines = data.split('\n') lines = data.split('\n')
textbuf = '' textbuf = ''

View file

@ -143,14 +143,8 @@ class MediaMiner(FanfictionSiteAdapter):
self.addSubject(subj) self.addSubject(subj)
return True return True
def _fetchUrl(self, url):
if not self.appEngine:
return self.opener.open(url).read().decode('utf-8')
else:
return googlefetch(url).content
def extractIndividualUrls(self): def extractIndividualUrls(self):
data = self._fetchUrl(self.url) data = self.fetchUrl(self.url)
#data.replace('<br />',' ').replace('<br>',' ').replace('</br>',' ') #data.replace('<br />',' ').replace('<br>',' ').replace('</br>',' ')
soup = bs.BeautifulSoup(data) soup = bs.BeautifulSoup(data)
#logging.debug('soap=%s' % soup) #logging.debug('soap=%s' % soup)
@ -318,7 +312,7 @@ class MediaMiner(FanfictionSiteAdapter):
def getText(self, url): def getText(self, url):
time.sleep( 2.0 ) time.sleep( 2.0 )
logging.debug('url=%s' % url) logging.debug('url=%s' % url)
data = self._fetchUrl(url) data = self.fetchUrl(url)
try: try:
soup = bs.BeautifulSoup(data) soup = bs.BeautifulSoup(data)