Fix up exception handling, first working appengine(SDK) version.

2026-05-08 21:11:59 +02:00 · 2011-05-04 17:54:36 -05:00 · 2011-05-04 17:54:36 -05:00 · 94669a2179
commit 94669a2179
parent 547411666d
16 changed files with 159 additions and 149 deletions
--- a/app.yaml
+++ b/app.yaml
@ -1,6 +1,6 @@
 # fanfictionloader
-application: fanfictionloader
-version: 3-0-2
+application: ffd-retief
+version: 4-0-0
 runtime: python
 api_version: 1

--- a/defaults.ini
+++ b/defaults.ini
@ -55,7 +55,7 @@ safe_filename: true
 extratags: FanFiction

 ## number of seconds to sleep between calls to the story site.
-slow_down_sleep_time:0.5
+#slow_down_sleep_time:0.5

 ## Each output format has a section that overrides [defaults]

@ -87,7 +87,7 @@ wide_titlepage_entries: description, storyUrl, author URL

 ## Each site has a section that overrides [defaults] *and* the format section
 [test1.com]
-titlepage_entries: title,description,category,genre, status,dateCreated,rating,numChapters,numWords,extratags,description,storyUrl,extratags
+#titlepage_entries: title,description,category,genre, status,dateCreated,rating,numChapters,numWords,extratags,description,storyUrl,extratags
 extratags: FanFiction,Testing

 ## If necessary, you can define [<site>:<format>] sections to customize
--- a/fanficdownloader/adapters/init.py
+++ b/fanficdownloader/adapters/init.py
@ -5,6 +5,8 @@ from os.path import dirname, basename, normpath
 import logging
 import urlparse as up

+import fanficdownloader.exceptions as exceptions
+
 ## This bit of complexity allows adapters to be added by just adding
 ## the source file.  It eliminates the long if/else clauses we used to
 ## need to pick out the adapter.
@ -21,11 +23,10 @@ def getAdapter(config,url):
            adapter = cls(config,url) # raises InvalidStoryURL
            return adapter
    # No adapter found.
-    raise UnknownSite( url, (cls.getSiteDomain() for cls in __class_list) )
+    raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )

 ## Automatically import each adapter_*.py file.
-## Each must call _register_handler() with their class to be
-## registered.
+## Each implement getClass() to their class

 filelist = glob.glob(dirname(__file__)+'/adapter_*.py')
 sys.path.insert(0,normpath(dirname(__file__)))
--- a/fanficdownloader/adapters/adapter_fanfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fanfictionnet.py
@ -5,8 +5,10 @@ import datetime
 import logging
 import re
 import urllib2
+import time

 import fanficdownloader.BeautifulSoup as bs
+import fanficdownloader.exceptions as exceptions

 from base_adapter import BaseSiteAdapter, utf8FromSoup

@ -50,7 +52,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
            soup = bs.BeautifulSoup(self._fetchUrl(url))
        except urllib2.HTTPError, e:
            if e.code == 404:
-                raise adapters.StoryDoesNotExist(self.url)
+                raise exceptions.StoryDoesNotExist(self.url)
            else:
                raise e
            
@ -166,14 +168,16 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):

    def getChapterText(self, url):
        logging.debug('Getting chapter text from: %s' % url)
-
+        time.sleep(0.5) ## ffnet tends to fail more if hit too fast.
+                        ## This is in additional to what ever the
+                        ## slow_down_sleep_time setting is.
        soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
                                     selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

        span = soup.find('div', {'id' : 'storytext'})

        if None == span:
-            raise adapters.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)
+            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)

        return utf8FromSoup(span)

--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
--- a/fanficdownloader/adapters/adapter_twilightednet.py
+++ b/fanficdownloader/adapters/adapter_twilightednet.py
@ -9,6 +9,7 @@ import urllib2

 import fanficdownloader.BeautifulSoup as bs
 from fanficdownloader.htmlcleanup import stripHTML
+import fanficdownloader.exceptions as exceptions

 from base_adapter import BaseSiteAdapter, utf8FromSoup

@ -74,7 +75,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
        if self.needToLoginCheck(d) :
            logging.info("Failed to login to URL %s as %s" % (loginUrl,
                                                              data['penname']))
-            raise adapters.FailedToLogin(url,data['penname'])
+            raise exceptions.FailedToLogin(url,data['penname'])
            return False
        else:
            return True
@ -88,7 +89,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
            data = self._fetchUrl(url)
        except urllib2.HTTPError, e:
            if e.code == 404:
-                raise adapters.StoryDoesNotExist(self.url)
+                raise exceptions.StoryDoesNotExist(self.url)
            else:
                raise e

@ -190,7 +191,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
        span = soup.find('div', {'id' : 'story'})

        if None == span:
-            raise adapters.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)
+            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)
    
        return utf8FromSoup(span)

--- a/fanficdownloader/adapters/adapter_whoficcom.py
+++ b/fanficdownloader/adapters/adapter_whoficcom.py
@ -7,6 +7,7 @@ import re
 import urllib2

 import fanficdownloader.BeautifulSoup as bs
+import fanficdownloader.exceptions as exceptions

 from base_adapter import BaseSiteAdapter, utf8FromSoup

@ -44,7 +45,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
            soup = bs.BeautifulSoup(self._fetchUrl(url))
        except urllib2.HTTPError, e:
            if e.code == 404:
-                raise adapters.StoryDoesNotExist(self.url)
+                raise exceptions.StoryDoesNotExist(self.url)
            else:
                raise e

@ -173,7 +174,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
        span = soup.find('span', {'style' : 'font-size: 100%;'})

        if None == span:
-            raise adapters.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)
+            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)
    
        return utf8FromSoup(span)

--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@ -9,7 +9,7 @@ import urlparse as up
 from fanficdownloader.story import Story
 from fanficdownloader.configurable import Configurable
 from fanficdownloader.htmlcleanup import removeEntities, removeAllEntities, stripHTML
-from fanficdownloader.adapters.exceptions import InvalidStoryURL
+from fanficdownloader.exceptions import InvalidStoryURL

 class BaseSiteAdapter(Configurable):

@ -29,6 +29,7 @@ class BaseSiteAdapter(Configurable):
        self.addConfigSection(self.getSiteDomain())
        self.opener = u2.build_opener(u2.HTTPCookieProcessor())
        self.storyDone = False
+        self.metadataDone = False
        self.story = Story()
        self.story.setMetadata('site',self.getSiteDomain())
        self.story.setMetadata('dateCreated',datetime.datetime.now())
@ -58,13 +59,19 @@ class BaseSiteAdapter(Configurable):
    # Does the download the first time it's called.
    def getStory(self):
        if not self.storyDone:
-            self.extractChapterUrlsAndMetadata()
+            self.getStoryMetadataOnly()
            for (title,url) in self.chapterUrls:
                self.story.addChapter(removeEntities(title),
                                      removeEntities(self.getChapterText(url)))
            self.storyDone = True
        return self.story

+    def getStoryMetadataOnly(self):
+        if not self.metadataDone:
+            self.extractChapterUrlsAndMetadata()
+            self.metadataDone = True
+        return self.story
+
    ###############################
    
    @staticmethod
--- a/fanficdownloader/adapters/exceptions.py
+++ b/fanficdownloader/adapters/exceptions.py
@ -14,7 +14,7 @@ class InvalidStoryURL(Exception):
        self.example=example
        
    def __str__(self):
-        return "Bad Story URL: %s\nFor site: %s\nExample: %s" % (self.url, self.domain, self.example)
+        return "Bad Story URL: (%s) for site: (%s) Example: (%s)" % (self.url, self.domain, self.example)

 class FailedToLogin(Exception):
    def __init__(self,url,username):
@ -22,14 +22,14 @@ class FailedToLogin(Exception):
        self.username=username
        
    def __str__(self):
-        return "Failed to Login for URL: %s with username: %s" % (self.url, self.username)
+        return "Failed to Login for URL: (%s) with username: (%s)" % (self.url, self.username)

 class StoryDoesNotExist(Exception):
    def __init__(self,url):
        self.url=url
        
    def __str__(self):
-        return "Story Does Not Exit: " + self.url
+        return "Story does not exist: (%s)" % self.url

 class UnknownSite(Exception):
    def __init__(self,url,supported_sites_list):
@ -37,5 +37,5 @@ class UnknownSite(Exception):
        self.supported_sites_list=supported_sites_list

    def __str__(self):
-        return "Unknown Site("+self.url+").  Supported sites: "+", ".join(self.supported_sites_list)
+        return "Unknown Site(%s).  Supported sites: (%s)" % (self.url, ", ".join(self.supported_sites_list))

--- a/index.html
+++ b/index.html
@ -65,8 +65,8 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
 					<div id='typeoptions'>
 						<input type='radio' name='format' value='epub' checked>EPub</input>
 						<input type='radio' name='format' value='html'>HTML</input>
-						<input type='radio' name='format' value='text'>Plain Text</input>
-						<input type='radio' name='format' value='mobi'>Mobi (Kindle)</input>
+						<input type='radio' name='format' value='txt'>Plain Text</input>
+						<p><i>For Mobi (Kindle) select EPub and Convert it.</i></p>
 					</div>
 				</div>
 				
--- a/main.py
+++ b/main.py
@ -15,10 +15,15 @@
 # limitations under the License.
 #

+import logging
+logging.getLogger().setLevel(logging.DEBUG)
+
 import os
+from os.path import dirname, basename, normpath
 import sys
 import zlib
-import logging
+import urllib
+
 import traceback
 import StringIO

@ -42,6 +47,9 @@ from fanficdownloader.zipdir import *

 from ffstorage import *

+from fanficdownloader import adapters, writers
+import ConfigParser
+
 class LoginRequired(webapp.RequestHandler):
 	def get(self):
 		user = users.get_current_user()
@ -104,29 +112,29 @@ class FileServer(webapp.RequestHandler):
 		
 		name = fanfic.name.encode('utf-8')
 		
-		name = makeAcceptableFilename(name)
+		#name = urllib.quote(name)
 		
 		logging.info("Serving file: %s" % name)

-		if fanfic.format == 'epub':
+		if name.endswith('.epub'):
 			self.response.headers['Content-Type'] = 'application/epub+zip'
-			self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.epub'
-		elif fanfic.format == 'html':
+		elif name.endswith('.html'):
 			self.response.headers['Content-Type'] = 'text/html'
-			self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.html.zip'
-		elif fanfic.format == 'text':
+		elif name.endswith('.txt'):
 			self.response.headers['Content-Type'] = 'text/plain'
-			self.response.headers['Content-disposition'] = 'attachment; filename=' +name + '.txt.zip'
-		elif fanfic.format == 'mobi':
-			self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook'
-			self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi'
+		elif name.endswith('.zip'):
+			self.response.headers['Content-Type'] = 'application/zip'
+		else:
+			self.response.headers['Content-Type'] = 'application/octet-stream'
+			
+		self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name 

 		data = DownloadData.all().filter("download =", fanfic).order("index")
-		# epub, txt and html are all already compressed.
+		# epubs are all already compressed.
 		# Each chunk is compress individually to avoid having
 		# to hold the whole in memory just for the
 		# compress/uncompress
-		if fanfic.format == 'mobi':
+		if fanfic.format != 'epub':
 			def dc(data):
 				try:
 					return zlib.decompress(data)
@ -230,18 +238,47 @@ class FanfictionDownloader(webapp.RequestHandler):
 		download.user = user
 		download.url = url
 		download.format = format
-		download.put()

+		adapter = None
 		
-		taskqueue.add(url='/fdowntask',
-			      queue_name="download",
-			      params={'format':format,
-				      'url':url,
-				      'login':login,
-				      'password':password,
-				      'user':user.email()})
+		try:
+			config = ConfigParser.ConfigParser()
+			logging.debug('reading defaults.ini config file, if present')
+			config.read('defaults.ini')
+			logging.debug('reading appengine.ini config file, if present')
+			config.read('appengine.ini')
+			adapter = adapters.getAdapter(config,url)
+			logging.info('Created an adaper: %s' % adapter)
+		
+			if len(login) > 1:
+				adapter.username=login
+				adapter.password=password
+			## This scrapes the metadata, which will be
+			## duplicated in the queue task, but it
+			## detects bad URLs, bad login, bad story, etc
+			## without waiting for the queue.  So I think
+			## it's worth the double up.  Could maybe save
+			## it all in the download object someday.
+			story = adapter.getStoryMetadataOnly()
+			download.title = story.getMetadata('title')
+			download.author = story.getMetadata('author')
+			download.put()
+
+			taskqueue.add(url='/fdowntask',
+				      queue_name="download",
+				      params={'format':format,
+					      'url':url,
+					      'login':login,
+					      'password':password,
+					      'user':user.email()})
+		
+			logging.info("enqueued download key: " + str(download.key()))
+
+		except Exception, e:
+			logging.exception(e)
+			download.failure = str(e)
+			download.put()
 		
-		logging.info("enqueued download key: " + str(download.key()))
 		self.redirect('/status?id='+str(download.key()))

 		return
@ -289,120 +326,67 @@ class FanfictionDownloaderTask(webapp.RequestHandler):
 		logging.info('Creating adapter...')
 		
 		try:
-			if url.find('fictionalley') != -1:
-				adapter = fictionalley.FictionAlley(url)
-			elif url.find('ficwad') != -1:
-				adapter = ficwad.FicWad(url)
-			elif url.find('fanfiction.net') != -1:
-				adapter = ffnet.FFNet(url)
-			elif url.find('fictionpress.com') != -1:
-				adapter = fpcom.FPCom(url)
-			elif url.find('harrypotterfanfiction.com') != -1:
-				adapter = hpfiction.HPFiction(url)
-			elif url.find('twilighted.net') != -1:
-				adapter = twilighted.Twilighted(url)
-			elif url.find('twiwrite.net') != -1:
-				adapter = twiwrite.Twiwrite(url)
-			elif url.find('adastrafanfic.com') != -1:
-				adapter = adastrafanfic.Adastrafanfic(url)
-			elif url.find('whofic.com') != -1:
-				adapter = whofic.Whofic(url)
-			elif url.find('potionsandsnitches.net') != -1:
-				adapter = potionsNsnitches.PotionsNSnitches(url)
-			elif url.find('mediaminer.org') != -1:
-				adapter = mediaminer.MediaMiner(url)
-			else:
-				logging.debug("Bad URL detected")
-				download.failure = url +" is not a valid story URL."
-				download.put()
-				return
+			config = ConfigParser.ConfigParser()
+			logging.debug('reading defaults.ini config file, if present')
+			config.read('defaults.ini')
+			logging.debug('reading appengine.ini config file, if present')
+			config.read('appengine.ini')
+			adapter = adapters.getAdapter(config,url)
 		except Exception, e:
 			logging.exception(e)
-			download.failure = "Adapter was not created: " + str(e)
+			download.failure = str(e)
 			download.put()
 			return
 		
 		logging.info('Created an adaper: %s' % adapter)
 		
 		if len(login) > 1:
-			adapter.setLogin(login)
-			adapter.setPassword(password)
+			adapter.username=login
+			adapter.password=password

-		if format == 'epub':
-			writerClass = output.EPubFanficWriter
-		elif format == 'html':
-			writerClass = output.HTMLWriter
-		elif format == 'mobi':
-			writerClass = output.MobiWriter
-		else:
-			writerClass = output.TextWriter
-		
-		loader = FanficLoader(adapter,
-				      writerClass,
-				      quiet = True,
-				      inmemory=True,
-				      compress=False)
 		try:
-			data = loader.download()
-			
-			if format == 'html' or format == 'text':
-				# data is uncompressed hence huge
-				ext = '.html'
-				if format == 'text':
-					ext = '.txt'
-				logging.debug(data)
-				files = {makeAcceptableFilename(str(adapter.getOutputName())) + ext : StringIO.StringIO(data.decode('utf-8')) }
-				d = inMemoryZip(files)
-				data = d.getvalue()
-			
-		
-		except LoginRequiredException, e:
-			logging.exception(e)
-			download.failure = 'Login problem detected'
-			download.put()
-			return
+			# adapter.getStory() is what does all the heavy lifting.
+			writer = writers.getWriter(format,config,adapter.getStory())
 		except Exception, e:
 			logging.exception(e)
-			download.failure = 'Some exception happened in downloader: ' + str(e) 
+			download.failure = str(e)
 			download.put()
 			return
-			
-		if data == None:
-			if loader.badLogin:
-				logging.debug("Bad login detected")
-				download.failure = 'Login failed'
-				download.put()
-				return
-			download.failure = 'No data returned by adaptor'
-			download.put()
-		else:
-			download.name = self._printableVersion(adapter.getOutputName())
-			download.title = self._printableVersion(adapter.getStoryName())
-			download.author = self._printableVersion(adapter.getAuthorName())
-			download.put()
-			index=0
+		
+		download.name = writer.getOutputFileName()
+		download.title = adapter.getStory().getMetadata('title')
+		download.author = adapter.getStory().getMetadata('author')
+		download.put()
+		index=0

-			# epub, txt and html are all already compressed.
-			# Each chunk is compressed individually to avoid having
-			# to hold the whole in memory just for the
-			# compress/uncompress.
-			if format == 'mobi':
-				def c(data):
-					return zlib.compress(data)
-			else:
-				def c(data):
-					return data
-				
-			while( len(data) > 0 ):
-				DownloadData(download=download,
-					     index=index,
-					     blob=c(data[:1000000])).put()
-				index += 1
-				data = data[1000000:]
-			download.completed=True
-			download.put()
+		outbuffer = StringIO.StringIO()
+		writer.writeStory(outbuffer)
+		data = outbuffer.getvalue()
+		outbuffer.close()
+		del writer
+		del adapter
+
+		# epubs are all already compressed.
+		# Each chunk is compressed individually to avoid having
+		# to hold the whole in memory just for the
+		# compress/uncompress.
+		if format != 'epub':
+			def c(data):
+				return zlib.compress(data)
+		else:
+			def c(data):
+				return data
 			
-			logging.info("Download finished OK")
+		while( len(data) > 0 ):
+			DownloadData(download=download,
+				     index=index,
+				     blob=c(data[:1000000])).put()
+			index += 1
+			data = data[1000000:]
+		download.completed=True
+		download.put()
+			
+		logging.info("Download finished OK")
 		return
 				
 def toPercentDecimal(match): 
--- a/newdownload.py
+++ b/newdownload.py
@ -6,7 +6,7 @@ logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lin
 import sys, os
 import getpass

-from fanficdownloader import adapters,writers
+from fanficdownloader import adapters,writers,exceptions

 import ConfigParser

@ -27,7 +27,7 @@ try:
        
    try:
        print adapter.getStory()
-    except adapters.FailedToLogin, ftl:
+    except exceptions.FailedToLogin, ftl:
        print "Login Failed, Need Username/Password."
        sys.stdout.write("Username: ")
        adapter.username = sys.stdin.readline().strip()
@ -40,9 +40,9 @@ try:
    writeStory(adapter,"txt")
    del adapter

-except adapters.InvalidStoryURL, isu:
+except exceptions.InvalidStoryURL, isu:
    print isu
-except adapters.StoryDoesNotExist, dne:
+except exceptions.StoryDoesNotExist, dne:
    print dne
-except adapters.UnknownSite, us:
+except exceptions.UnknownSite, us:
    print us
--- a/simplejson/init.pyc
+++ b/simplejson/init.pyc
--- a/simplejson/decoder.pyc
+++ b/simplejson/decoder.pyc
--- a/simplejson/encoder.pyc
+++ b/simplejson/encoder.pyc
--- a/simplejson/scanner.pyc
+++ b/simplejson/scanner.pyc