diff --git a/adapter.py b/adapter.py
index 054267b9..9c1d6d5a 100644
--- a/adapter.py
+++ b/adapter.py
@@ -3,6 +3,11 @@
import logging
import datetime
+try:
+ from google.appengine.api.urlfetch import fetch as googlefetch
+ appEngineGlob = True
+except:
+ appEngineGlob = False
class LoginRequiredException(Exception):
def __init__(self, url):
@@ -12,11 +17,7 @@ class LoginRequiredException(Exception):
return repr(self.url + ' requires user to be logged in')
class FanfictionSiteAdapter:
- try:
- from google.appengine.api.urlfetch import fetch as googlefetch
- appEngine = True
- except:
- appEngine = False
+ appEngine = appEngineGlob
login = ''
password = ''
url = ''
@@ -30,6 +31,7 @@ class FanfictionSiteAdapter:
authorURL = ''
outputStorySep = '-Ukn_'
outputName = ''
+ outputFileName = ''
storyDescription = ''
storyCharacters = []
storySeries = ''
@@ -57,6 +59,12 @@ class FanfictionSiteAdapter:
def hasAppEngine(self):
return self.appEngine
+ def fetchUrl(self, url):
+ if not self.appEngine:
+ return self.opener.open(url).read().decode('utf-8')
+ else:
+ return googlefetch(url).content
+
def requiresLogin(self, url = None):
return False
@@ -86,9 +94,14 @@ class FanfictionSiteAdapter:
def getOutputName(self):
self.outputName = self.storyName.replace(" ", "_") + self.outputStorySep + self.storyId
- logging.debug('self.storyId=%s, self.storyName=%s self.outputName=%s' % (self.storyId, self.storyName, self.outputName))
+ logging.debug('self.outputName=%s' % self.outputName)
return self.outputName
+ def getOutputFileName(self, booksDirectory, format):
+ self.outputFileName = booksDirectory + "/" + self.getOutputName() + "." + format
+ logging.debug('self.outputFileName=%s' % self.outputFileName)
+ return self.outputNameFileName
+
def getAuthorURL(self):
logging.debug('self.authorURL=%s' % self.authorURL)
return self.authorURL
diff --git a/downloader.py b/downloader.py
index 310eac41..ee0120f3 100644
--- a/downloader.py
+++ b/downloader.py
@@ -55,10 +55,13 @@ class FanficLoader:
urls = self.adapter.extractIndividualUrls()
- s = self.booksDirectory + "/" + self.adapter.getOutputName() + "." + format
- if not self.overWrite and os.path.isfile(s):
- print >> sys.stderr, "File " + s + " already exists! Skipping!"
- exit(10)
+ if (self.adapter.hasAppEngine):
+ self.overWrite = True
+ else:
+ s = self.adapter.getOutputFileName(self.booksDirectory, format)
+ if not self.overWrite and os.path.isfile(s):
+ print >> sys.stderr, "File " + s + " already exists! Skipping!"
+ exit(10)
self.writer = self.writerClass(self.booksDirectory, self.adapter, inmemory=self.inmemory, compress=self.compress)
diff --git a/ffnet.py b/ffnet.py
index a036988e..d0eff9e7 100644
--- a/ffnet.py
+++ b/ffnet.py
@@ -143,14 +143,8 @@ class FFNet(FanfictionSiteAdapter):
self.addSubject(subj)
return True
- def _fetchUrl(self, url):
- if not self.appEngine:
- return self.opener.open(url).read().decode('utf-8')
- else:
- return googlefetch(url).content
-
def extractIndividualUrls(self):
- data = self._fetchUrl(self.url)
+ data = self.fetchUrl(self.url)
d2 = re.sub('&\#[0-9]+;', ' ', data)
soup = bs.BeautifulStoneSoup(d2)
allA = soup.findAll('a')
@@ -264,7 +258,7 @@ class FFNet(FanfictionSiteAdapter):
def getText(self, url):
time.sleep( 2.0 )
- data = self._fetchUrl(url)
+ data = self.fetchUrl(url)
lines = data.split('\n')
textbuf = ''
diff --git a/fpcom.py b/fpcom.py
index 04266888..b9431322 100644
--- a/fpcom.py
+++ b/fpcom.py
@@ -136,15 +136,9 @@ class FPCom(FanfictionSiteAdapter):
if len(subj) > 0:
self.addSubject(subj)
return True
-
- def _fetchUrl(self, url):
- if not self.appEngine:
- return self.opener.open(url).read().decode('utf-8')
- else:
- return googlefetch(url).content
def extractIndividualUrls(self):
- data = self._fetchUrl(self.url)
+ data = self.fetchUrl(self.url)
d2 = re.sub('&\#[0-9]+;', ' ', data)
soup = bs.BeautifulStoneSoup(d2)
allA = soup.findAll('a')
@@ -283,7 +277,7 @@ class FPCom(FanfictionSiteAdapter):
def getText(self, url):
time.sleep( 2.0 )
- data = self._fetchUrl(url)
+ data = self.fetchUrl(url)
lines = data.split('\n')
textbuf = ''
diff --git a/mediaminer.py b/mediaminer.py
index dd2ef3ea..77ff29bd 100644
--- a/mediaminer.py
+++ b/mediaminer.py
@@ -143,14 +143,8 @@ class MediaMiner(FanfictionSiteAdapter):
self.addSubject(subj)
return True
- def _fetchUrl(self, url):
- if not self.appEngine:
- return self.opener.open(url).read().decode('utf-8')
- else:
- return googlefetch(url).content
-
def extractIndividualUrls(self):
- data = self._fetchUrl(self.url)
+ data = self.fetchUrl(self.url)
#data.replace('
',' ').replace('
',' ').replace('',' ')
soup = bs.BeautifulSoup(data)
#logging.debug('soap=%s' % soup)
@@ -318,7 +312,7 @@ class MediaMiner(FanfictionSiteAdapter):
def getText(self, url):
time.sleep( 2.0 )
logging.debug('url=%s' % url)
- data = self._fetchUrl(url)
+ data = self.fetchUrl(url)
try:
soup = bs.BeautifulSoup(data)