From 35065593afdf9cc5767f29f1536f92103fd7f940 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Thu, 11 Nov 2010 13:14:15 -0600 Subject: [PATCH] Refactored source code to remove redundant code.. Moved most of the adapter get and set functions for variables into the adapter.py. Define all the adapter variables in adapter.py. Remove the unneeded functions from the various adapter sources. Fix a couple of problems with fpcom.py. Rearrange the Genre and Category for some of the adapters. --- adapter.py | 161 +++++++++++++++++++++------ ffnet.py | 173 ++-------------------------- fictionalley.py | 167 +++------------------------ ficwad.py | 178 ++++------------------------- fpcom.py | 266 ++++++++------------------------------------ hpfiction.py | 137 +---------------------- potionsNsnitches.py | 190 +++---------------------------- twilighted.py | 173 ++++------------------------ 8 files changed, 257 insertions(+), 1188 deletions(-) diff --git a/adapter.py b/adapter.py index 03c3b59d..d4f80cc5 100644 --- a/adapter.py +++ b/adapter.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +import logging +import datetime + + class LoginRequiredException(Exception): def __init__(self, url): self.url = url @@ -8,16 +12,56 @@ class LoginRequiredException(Exception): return repr(self.url + ' requires user to be logged in') class FanfictionSiteAdapter: + try: + from google.appengine.api.urlfetch import fetch as googlefetch + appEngine = True + except: + appEngine = False login = '' password = '' + url = '' + host = '' + path = '' + uuid = '' + storyName = '' + storyId = '' + authorName = '' + authorId = '' + authorURL = '' + outputStorySep = '-Ukn_' + outputName = '' + storyDescription = '' + storyCharacters = [] + storySeries = '' + storyPublished = datetime.date(1970, 01, 31) + storyCreated = datetime.datetime.now() + storyUpdated = datetime.date(1970, 01, 31) + languageId = 'en-UK' + language = 'English' + subjects = [] + publisher = '' + numChapters = '0' + numWords = '0' + genre = '' + category = '' + storyStatus = 'In-Progress' + storyRating = '' + storyUserRating = '0' def __init__(self, url): - pass + # basic plain url parsing... + self.url = url + parsedUrl = up.urlparse(url) + self.host = parsedUrl.netloc + self.path = parsedUrl.path + + def hasAppEngine(self): + return self.appEngine def requiresLogin(self, url = None): - pass + return False def performLogin(self, url = None): - pass + return True def extractIndividualUrls(self): pass @@ -26,88 +70,133 @@ class FanfictionSiteAdapter: pass def setLogin(self, login): - pass + self.login = login def setPassword(self, password): - pass + self.password = password def getHost(self): - pass + logging.debug('self.host=%s' % self.host) + return self.host - def getStoryURL(self): - pass - def getUUID(self): - pass + self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId + logging.debug('self.uuid=%s' % self.uuid) + return self.uuid def getOutputName(self): - pass + self.outputName = self.storyName.replace(" ", "_") + self.outputStorySep + self.storyId + logging.debug('self.storyId=%s, self.storyName=%s self.outputName=%s' % (self.storyId, self.storyName, self.outputName)) + return self.outputName def getAuthorURL(self): - pass + logging.debug('self.authorURL=%s' % self.authorURL) + return self.authorURL def getAuthorId(self): - pass + logging.debug('self.authorId=%s' % self.authorId) + return self.authorId def getAuthorName(self): - pass + logging.debug('self.authorName=%s' % self.authorName) + return self.authorName + + def getStoryURL(self): + logging.debug('self.url=%s' % self.url) + return self.url def getStoryId(self): - pass + logging.debug('self.storyId=%s' % self.storyId) + return self.storyId def getStoryName(self): - pass + logging.debug('self.storyName=%s' % self.storyName) + return self.storyName def getStoryDescription(self): - pass + logging.debug('self.storyDescription=%s' % self.storyDescription) + return self.storyDescription def getStoryCreated(self): - pass + self.storyCreated = datetime.datetime.now() + logging.debug('self.storyCreated=%s' % self.storyCreated) + return self.storyCreated + def addCharacter(self, character): + chara = character.upper() + for c in self.storyCharacters: + if c.upper() == chara: + return False + self.storyCharacters.append(character) + return True + + def getStoryCharacters(self): + logging.debug('self.storyCharacters=%s' % self.storyCharacters) + return self.storyCharacters + def getStoryPublished(self): - pass + logging.debug('self.storyPublished=%s' % self.storyPublished) + return self.storyPublished def getStoryUpdated(self): - pass + logging.debug('self.storyUpdated=%s' % self.storyUpdated) + return self.storyUpdated def getStorySeries(self): - pass + logging.debug('self.storySeries=%s' % self.storySeries) + return self.storySeries def getLanguage(self): - pass + logging.debug('self.language=%s' % self.language) + return self.language def getLanguageId(self): - pass + logging.debug('self.languageId=%s' % self.languageId) + return self.languageId + + def addSubject(self, subject): + subj = subject.upper() + for s in self.subjects: + if s.upper() == subj: + return False + self.subjects.append(subject) + return True def getSubjects(self): - pass - - def getCharacters(self): - pass + logging.debug('self.subjects=%s' % self.authorName) + return self.subjects def getPublisher(self): - pass + logging.debug('self.publisher=%s' % self.publisher) + return self.publisher def getNumChapters(self): - pass + logging.debug('self.numChapters=%s' % self.numChapters) + return self.numChapters def getNumWords(self): - pass + logging.debug('self.numWords=%s' % self.numWords) + return self.numWords def getCategory(self): - pass + logging.debug('self.category=%s' % self.category) + return self.category def getGenre(self): - pass + logging.debug('self.genre=%s' % self.genre) + return self.genre def getStoryStatus(self): - pass + logging.debug('self.storyStatus=%s' % self.storyStatus) + return self.storyStatus def getStoryRating(self): - pass + logging.debug('self.storyRating=%s' % self.storyRating) + return self.storyRating def getStoryUserRating(self): - pass + logging.debug('self.storyUserRating=%s' % self.storyUserRating) + return self.storyUserRating def getPrintableUrl(self, url): - pass + return url diff --git a/ffnet.py b/ffnet.py index 3f9f41e5..a036988e 100644 --- a/ffnet.py +++ b/ffnet.py @@ -27,12 +27,6 @@ except: # tough luck pass -try: - from google.appengine.api.urlfetch import fetch as googlefetch - appEngine = True -except: - appEngine = False - class FFNet(FanfictionSiteAdapter): def __init__(self, url): self.url = url @@ -42,7 +36,6 @@ class FFNet(FanfictionSiteAdapter): self.storyName = 'FF.Net story' self.authorName = 'FF.Net author' - self.outputName = 'FF.Net_story' self.storyDescription = 'Fanfiction Story' self.storyCharacters = [] self.storySeries = '' @@ -65,6 +58,8 @@ class FFNet(FanfictionSiteAdapter): self.storyStatus = 'In-Progress' self.storyRating = 'K' self.storyUserRating = '0' + self.outputName = '' + self.outputStorySep = '-ffnet_' logging.debug('self.path=%s' % self.path) @@ -85,18 +80,14 @@ class FFNet(FanfictionSiteAdapter): chapter = '1' if len(spl) == 5: self.path = "/".join(spl[1:-1]) - self.outputName = spl[4] + '-ffnet_' + spl[2] if self.path.endswith('/'): self.path = self.path[:-1] logging.debug('self.path=%s' % self.path) - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) - - logging.debug('self.storyId=%s, chapter=%s, self.outputName=%s' % (self.storyId, chapter, self.outputName)) - if not appEngine: + logging.debug('self.storyId=%s, chapter=%s' % (self.storyId, chapter)) + if not self.appEngine: self.opener = u2.build_opener(u2.HTTPCookieProcessor()) else: self.opener = None @@ -106,12 +97,6 @@ class FFNet(FanfictionSiteAdapter): def _getLoginScript(self): return self.path - def requiresLogin(self, url = None): - return False - - def performLogin(self, url = None): - return True - def _getVarValue(self, varstr): #logging.debug('_getVarValue varstr=%s' % varstr) vals = varstr.split('=') @@ -126,7 +111,7 @@ class FFNet(FanfictionSiteAdapter): def _splitCrossover(self, subject): if "Crossover" in subject: - self._addSubject ("Crossover") + self.addSubject ("Crossover") logging.debug('Crossover=%s' % subject) if subject.find(' and ') != -1: words = subject.split(' ') @@ -135,18 +120,18 @@ class FFNet(FanfictionSiteAdapter): for s in words: if s in "and Crossover": if len(subj) > 0: - self._addSubject(subj) + self.addSubject(subj) subj = '' else: if len(subj) > 0: subj = subj + ' ' subj = subj + s if len(subj) > 0: - self._addSubject(subj) + self.addSubject(subj) else: - self._addSubject(subject) + self.addSubject(subject) else: - self._addSubject(subject) + self.addSubject(subject) return True def _splitGenre(self, subject): @@ -155,28 +140,11 @@ class FFNet(FanfictionSiteAdapter): logging.debug('words=%s' % words) for subj in words: if len(subj) > 0: - self._addSubject(subj) - return True - - def _addSubject(self, subject): - subj = subject.upper() - for s in self.subjects: - if s.upper() == subj: - return False - - self.subjects.append(subject) - return True - - def _addCharacter(self, character): - chara = character.upper() - for c in self.storyCharacters: - if c.upper() == chara: - return False - self.storyCharacters.append(character) + self.addSubject(subj) return True def _fetchUrl(self, url): - if not appEngine: + if not self.appEngine: return self.opener.open(url).read().decode('utf-8') else: return googlefetch(url).content @@ -198,9 +166,7 @@ class FFNet(FanfictionSiteAdapter): if l.find("»") != -1 and l.find('') != -1: s2 = bs.BeautifulStoneSoup(l) self.storyName = str(s2.find('b').string) - # mangling storyName replaces url for outputName - self.outputName = self.storyName.replace(" ", "_") + '-ffnet_' + self.storyId - logging.debug('self.storyId=%s, self.storyName=%s, self.outputName=%s' % (self.storyId, self.storyName, self.outputName)) + logging.debug('self.storyId=%s, self.storyName=%s' % (self.storyId, self.storyName)) elif l.find(", remove before '-'. title = soup.find('title').string self.storyName = "-".join(title.split('-')[1:]).strip().replace(" (Story Text)","") - self.outputName = self.storyName.replace(" ", "_") + '-fa_' + self.storyId links = soup.findAll('li') @@ -226,24 +210,8 @@ class FictionAlley(FanfictionSiteAdapter): print('Story "%s" by %s' % (self.storyName, self.authorName)) - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) - return result - def getHost(self): - logging.debug('self.host=%s' % self.host) - return self.host - - def getStoryName(self): - return self.storyName - - def getAuthorName(self): - return self.authorName - - def getOutputName(self): - return self.outputName - def getText(self, url): # fictionalley uses full URLs in chapter list. data = self.opener.open(url).read() @@ -268,109 +236,6 @@ class FictionAlley(FanfictionSiteAdapter): else: return div.__str__('utf8').replace('crazytagstringnobodywouldstumbleonaccidently','div') - def getStoryURL(self): - logging.debug('self.url=%s' % self.url) - return self.url - - def getAuthorURL(self): - logging.debug('self.authorURL=%s' % self.authorURL) - return self.authorURL - - def getUUID(self): - logging.debug('self.uuid=%s' % self.uuid) - return self.uuid - - def getAuthorId(self): - logging.debug('self.authorId=%s' % self.authorId) - return self.authorId - - def getStoryId(self): - logging.debug('self.storyId=%s' % self.storyId) - return self.storyId - - def getStoryDescription(self): - logging.debug('self.storyDescription=%s' % self.storyDescription) - return self.storyDescription - - def getStoryPublished(self): - logging.debug('self.storyPublished=%s' % self.storyPublished) - return self.storyPublished - - def getStoryCreated(self): - self.storyCreated = datetime.datetime.now() - logging.debug('self.storyCreated=%s' % self.storyCreated) - return self.storyCreated - - def getStoryUpdated(self): - logging.debug('self.storyUpdated=%s' % self.storyUpdated) - return self.storyUpdated - - def getLanguage(self): - logging.debug('self.language=%s' % self.language) - return self.language - - def getLanguageId(self): - logging.debug('self.languageId=%s' % self.languageId) - return self.languageId - - def getSubjects(self): - logging.debug('self.subjects=%s' % self.authorName) - return self.subjects - - def getPublisher(self): - logging.debug('self.publisher=%s' % self.publisher) - return self.publisher - - def getNumChapters(self): - logging.debug('self.numChapters=%s' % self.numChapters) - return self.numChapters - - def getNumWords(self): - logging.debug('self.numWords=%s' % self.numWords) - return self.numWords - - def getCategory(self): - logging.debug('self.category=%s' % self.category) - return self.category - - def getGenre(self): - logging.debug('self.genre=%s' % self.genre) - return self.genre - - def getStoryStatus(self): - logging.debug('self.storyStatus=%s' % self.storyStatus) - return self.storyStatus - - def getStoryRating(self): - logging.debug('self.storyRating=%s' % self.storyRating) - return self.storyRating - - def getStoryUserRating(self): - logging.debug('self.storyUserRating=%s' % self.storyUserRating) - return self.storyUserRating - - def getPrintableUrl(self, url): - return url - - def getPasswordLine(self): - return 'opaopapassword' - - def getLoginScript(self): - return 'opaopaloginscript' - - def getLoginPasswordOthers(self): - login = dict(login = 'name', password = 'pass') - other = dict(submit = 'Log In', remember='yes') - return (login, other) - - def getStoryCharacters(self): - logging.debug('self.storyCharacters=%s' % self.storyCharacters) - return self.storyCharacters - - def getStorySeries(self): - logging.debug('self.storySeries=%s' % self.storySeries) - return self.storySeries - if __name__ == '__main__': diff --git a/ficwad.py b/ficwad.py index 8ea76b60..ddac91a8 100644 --- a/ficwad.py +++ b/ficwad.py @@ -21,36 +21,6 @@ class FicWad(FanfictionSiteAdapter): def __init__(self, url): self.url = url self.host = up.urlparse(url).netloc - - def requiresLogin(self, url = None): - return False - - def performLogin(self, url = None): - pass - - def setLogin(self, login): - self.login = login - - def setPassword(self, password): - self.password = password - - def _addSubject(self, subject): - subj = subject.upper() - for s in self.subjects: - if s.upper() == subj: - return False - self.subjects.append(subject) - return True - - def _addCharacter(self, character): - chara = character.upper() - for c in self.storyCharacters: - if c.upper() == chara: - return False - self.storyCharacters.append(character) - return True - - def extractIndividualUrls(self): self.storyDescription = 'Fanfiction Story' self.authorId = '0' self.storyId = '0' @@ -71,6 +41,21 @@ class FicWad(FanfictionSiteAdapter): self.storyUserRating = '0' self.storyCharacters = [] self.storySeries = '' + self.outputName = '' + self.outputStorySep = '-fw_' + + def getPasswordLine(self): + return 'opaopapassword' + + def getLoginScript(self): + return 'opaopaloginscript' + + def getLoginPasswordOthers(self): + login = dict(login = 'name', password = 'pass') + other = dict(submit = 'Log In', remember='yes') + return (login, other) + + def extractIndividualUrls(self): oldurl = '' data = u2.urlopen(self.url).read() @@ -98,9 +83,8 @@ class FicWad(FanfictionSiteAdapter): storyinfo = story.find('h4').find('a') (u0, u1, self.storyId) = storyinfo['href'].split('/') self.storyName = storyinfo.string.strip() - self.outputName = self.storyName.replace(" ", "_") + '-fw_' + self.storyId - logging.debug('self.storyName=%s, self.storyId=%s, self.outputName=%s' % (self.storyName, self.storyId, self.outputName)) + logging.debug('self.storyName=%s, self.storyId=%s' % (self.storyName, self.storyId)) author = soup.find('span', {'class' : 'author'}) self.authorName = str(author.a.string) @@ -134,7 +118,7 @@ class FicWad(FanfictionSiteAdapter): if self.category == 'Category': self.category = str(a.string) logging.debug('self.category=%s' % self.category) - self._addSubject(self.category) + self.addSubject(self.category) logging.debug('self.subjects=%s' % self.subjects) elif skey == 'Rating': self.storyRating = s1[1] @@ -144,12 +128,12 @@ class FicWad(FanfictionSiteAdapter): logging.debug('self.genre=%s' % self.genre) s2 = s1[1].split(', ') for ss2 in s2: - self._addSubject(ss2) + self.addSubject(ss2) logging.debug('self.subjects=%s' % self.subjects) elif skey == 'Characters': s2 = s1[1].split(', ') for ss2 in s2: - self._addCharacter(ss2) + self.addCharacter(ss2) logging.debug('self.storyCharacters=%s' % self.storyCharacters) elif skey == 'Chapters': self.numChapters = s1[1] @@ -208,6 +192,7 @@ class FicWad(FanfictionSiteAdapter): select = soup.find('select', { 'name' : 'goto' } ) if select is None: + self.numChapters = '1' result.append((self.url,self.storyName)) logging.debug('Chapter[%s]=%s %s' % (ii, self.url, self.storyName)) else: @@ -223,25 +208,12 @@ class FicWad(FanfictionSiteAdapter): ii = ii+1 else: logging.debug('Skipping Story Index. URL %s' % url) - - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) + + self.numChapters = str(ii) + logging.debug('self.numChapters=%s' % self.numChapters) return result - def getHost(self): - logging.debug('self.host=%s' % self.host) - return self.host - - def getStoryName(self): - return self.storyName - - def getOutputName(self): - return self.outputName - - def getAuthorName(self): - return self.authorName - def getText(self, url): if url.find('http://') == -1: url = 'http://' + self.host + '/' + url @@ -256,110 +228,6 @@ class FicWad(FanfictionSiteAdapter): return '' return div.__str__('utf8') - def getStoryURL(self): - logging.debug('self.url=%s' % self.url) - return self.url - - def getAuthorURL(self): - logging.debug('self.authorURL=%s' % self.authorURL) - return self.authorURL - - def getUUID(self): - logging.debug('self.uuid=%s' % self.uuid) - return self.uuid - - def getAuthorId(self): - logging.debug('self.authorId=%s' % self.authorId) - return self.authorId - - def getStoryId(self): - logging.debug('self.storyId=%s' % self.storyId) - return self.storyId - - def getStoryDescription(self): - logging.debug('self.storyDescription=%s' % self.storyDescription) - return self.storyDescription - - def getStoryPublished(self): - logging.debug('self.storyPublished=%s' % self.storyPublished) - return self.storyPublished - - def getStoryCreated(self): - self.storyCreated = datetime.datetime.now() - logging.debug('self.storyCreated=%s' % self.storyCreated) - return self.storyCreated - - def getStoryUpdated(self): - logging.debug('self.storyUpdated=%s' % self.storyUpdated) - return self.storyUpdated - - def getLanguage(self): - logging.debug('self.language=%s' % self.language) - return self.language - - def getLanguageId(self): - logging.debug('self.languageId=%s' % self.languageId) - return self.languageId - - def getSubjects(self): - logging.debug('self.subjects=%s' % self.authorName) - return self.subjects - - def getPublisher(self): - logging.debug('self.publisher=%s' % self.publisher) - return self.publisher - - def getNumChapters(self): - logging.debug('self.numChapters=%s' % self.numChapters) - return self.numChapters - - def getNumWords(self): - logging.debug('self.numWords=%s' % self.numWords) - return self.numWords - - def getCategory(self): - logging.debug('self.category=%s' % self.category) - return self.category - - def getGenre(self): - logging.debug('self.genre=%s' % self.genre) - return self.genre - - def getStoryStatus(self): - logging.debug('self.storyStatus=%s' % self.storyStatus) - return self.storyStatus - - def getStoryRating(self): - logging.debug('self.storyRating=%s' % self.storyRating) - return self.storyRating - - def getStoryUserRating(self): - logging.debug('self.storyUserRating=%s' % self.storyUserRating) - return self.storyUserRating - - def getPrintableUrl(self, url): - return url - - def getPasswordLine(self): - return 'opaopapassword' - - def getLoginScript(self): - return 'opaopaloginscript' - - def getLoginPasswordOthers(self): - login = dict(login = 'name', password = 'pass') - other = dict(submit = 'Log In', remember='yes') - return (login, other) - - def getStoryCharacters(self): - logging.debug('self.storyCharacters=%s' % self.storyCharacters) - return self.storyCharacters - - def getStorySeries(self): - logging.debug('self.storySeries=%s' % self.storySeries) - return self.storySeries - - if __name__ == '__main__': url = 'http://www.ficwad.com/story/14536' diff --git a/fpcom.py b/fpcom.py index 0389e9a9..04266888 100644 --- a/fpcom.py +++ b/fpcom.py @@ -27,23 +27,16 @@ except: # tough luck pass -try: - from google.appengine.api.urlfetch import fetch as googlefetch - appEngine = True -except: - appEngine = False - class FPCom(FanfictionSiteAdapter): - def __init__(self, url): + def __init__(self, url): self.url = url parsedUrl = up.urlparse(url) self.host = parsedUrl.netloc self.path = parsedUrl.path - self.storyName = 'Story' - self.authorName = 'Author' - self.outputName = 'Story' - self.storyDescription = 'A Story' + self.storyName = '' + self.authorName = '' + self.storyDescription = '' self.storyCharacters = [] self.storySeries = '' self.authorId = '0' @@ -55,23 +48,21 @@ class FPCom(FanfictionSiteAdapter): self.languageId = 'en-UK' self.language = 'English' self.subjects = [] - logging.debug('self.subjects=%s' % self.subjects) self.publisher = self.host self.numChapters = 0 self.numWords = 0 - self.genre = 'Fiction' - self.category = 'Category' + self.genre = '' + self.category = '' self.storyStatus = 'In-Progress' self.storyRating = 'K' self.storyUserRating = '0' + self.outputName = '' + self.outputStorySep = '-fpcom_' - logging.debug('self.path=%s' % self.path) - if self.path.startswith('/'): self.path = self.path[1:] spl = self.path.split('/') - logging.debug('spl=%s' % spl) if spl is not None: if len(spl) > 0 and spl[0] != 's': logging.error("Error URL \"%s\" is not a story." % self.url) @@ -84,33 +75,22 @@ class FPCom(FanfictionSiteAdapter): chapter = '1' if len(spl) == 5: self.path = "/".join(spl[1:-1]) - self.outputName = spl[4] + '-ffnet_' + spl[2] if self.path.endswith('/'): self.path = self.path[:-1] logging.debug('self.path=%s' % self.path) - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) - - logging.debug('self.storyId=%s, chapter=%s, self.outputName=%s' % (self.storyId, chapter, self.outputName)) - if not appEngine: + if not self.appEngine: self.opener = u2.build_opener(u2.HTTPCookieProcessor()) else: self.opener = None - logging.debug("Created FF.Net: url=%s" % (self.url)) + logging.debug("Created FP.Com: url=%s" % (self.url)) def _getLoginScript(self): return self.path - def requiresLogin(self, url = None): - return False - - def performLogin(self, url = None): - return True - def _getVarValue(self, varstr): #logging.debug('_getVarValue varstr=%s' % varstr) vals = varstr.split('=') @@ -125,7 +105,7 @@ class FPCom(FanfictionSiteAdapter): def _splitCrossover(self, subject): if "Crossover" in subject: - self._addSubject ("Crossover") + self.addSubject ("Crossover") logging.debug('Crossover=%s' % subject) if subject.find(' and ') != -1: words = subject.split(' ') @@ -134,18 +114,18 @@ class FPCom(FanfictionSiteAdapter): for s in words: if s in "and Crossover": if len(subj) > 0: - self._addSubject(subj) + self.addSubject(subj) subj = '' else: if len(subj) > 0: subj = subj + ' ' subj = subj + s if len(subj) > 0: - self._addSubject(subj) + self.addSubject(subj) else: - self._addSubject(subject) + self.addSubject(subject) else: - self._addSubject(subject) + self.addSubject(subject) return True def _splitGenre(self, subject): @@ -154,28 +134,11 @@ class FPCom(FanfictionSiteAdapter): logging.debug('words=%s' % words) for subj in words: if len(subj) > 0: - self._addSubject(subj) - return True - - def _addSubject(self, subject): - subj = subject.upper() - for s in self.subjects: - if s.upper() == subj: - return False - - self.subjects.append(subject) - return True - - def _addCharacter(self, character): - chara = character.upper() - for c in self.storyCharacters: - if c.upper() == chara: - return False - self.storyCharacters.append(character) + self.addSubject(subj) return True def _fetchUrl(self, url): - if not appEngine: + if not self.appEngine: return self.opener.open(url).read().decode('utf-8') else: return googlefetch(url).content @@ -206,8 +169,7 @@ class FPCom(FanfictionSiteAdapter): if tt is not None: if len(tt) > 0: self.storyName = tt[0] - self.outputName = self.storyName.replace(" ", "_") + '-fpcom_' + self.storyId - logging.debug('self.storyId=%s, self.storyName=%s, self.outputName=%s' % (self.storyId, self.storyName, self.outputName)) + logging.debug('self.storyId=%s, self.storyName=%s' % (self.storyId, self.storyName)) if len(tt) > 1: tt1 = tt[1].split(' - ') if tt1 is not None and len(tt1) > 0: @@ -217,31 +179,33 @@ class FPCom(FanfictionSiteAdapter): for cc1 in cc: if cc1 is not None and cc1 != 'a': if cc1 == 'fanfic': - self._addSubject('FanFiction') + self.addSubject('FanFiction') else: - self._addSubject(cc1) + self.addSubject(cc1) logging.debug('self.subjects=%s' % self.subjects) + numchapters = 0 + urlstory = '' + fidochap = soup.find('form', {'name':'fidochap'}) sl = fidochap.find('select', {'title':'chapter navigation'}) - logging.debug('sl=%s' % sl ) - urlstory = '' - if 'onchange' in sl._getAttrMap(): - ocs = sl['onchange'].split('\'') - logging.debug('ocs=%s' % ocs) - if ocs is not None and len(ocs) > 3: - urlstory = ocs[3] - logging.debug('urlstory=%s' % urlstory) - - numchapters = 0 - opts = sl.findAll('option') - for o in opts: - if 'value' in o._getAttrMap(): - url = 'http://' + self.host + '/s/' + self.storyId + '/' + o['value'] + urlstory - logging.debug('URL=%s, Title=%s' % (url, o.string)) - urls.append((url, o.string)) - numchapters = numchapters + 1 + if sl is not None: + logging.debug('sl=%s' % sl ) + if 'onchange' in sl._getAttrMap(): + ocs = sl['onchange'].split('\'') + logging.debug('ocs=%s' % ocs) + if ocs is not None and len(ocs) > 3: + urlstory = ocs[3] + logging.debug('urlstory=%s' % urlstory) + + opts = sl.findAll('option') + for o in opts: + if 'value' in o._getAttrMap(): + url = 'http://' + self.host + '/s/' + self.storyId + '/' + o['value'] + urlstory + logging.debug('URL=%s, Title=%s' % (url, o.string)) + urls.append((url, o.string)) + numchapters = numchapters + 1 if numchapters == 0: numchapters = 1 @@ -265,7 +229,7 @@ class FPCom(FanfictionSiteAdapter): if len(self.genre) > 0: self.genre = self.genre + ', ' self.genre = self.genre + ss - self._addSubject(ss) + self.addSubject(ss) logging.debug('self.genre=%s' % self.genre) logging.debug('self.subjects=%s' % self.subjects) tda = td.find ('a') @@ -290,7 +254,7 @@ class FPCom(FanfictionSiteAdapter): logging.debug('self.category=%s' % self.category) sgs = self.category.split('/') for sg in sgs: - self._addSubject(sg) + self.addSubject(sg) logging.debug('self.subjects=%s' % self.subjects) if lls > 3 and ss[3].strip() == 'Reviews:' and ll > 4: reviews = tdas[4] @@ -301,23 +265,19 @@ class FPCom(FanfictionSiteAdapter): lls = len(ss) if lls > 1: sds = ss[1].split(': ') - logging.debug('sds=%s' % sds) if sds is not None and len(sds) > 1 and sds[0] == 'Published': self.storyPublished = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) logging.debug('self.storyPublished=%s' % self.storyPublished) lls = len(ss) if lls > 2: sds = ss[2].split(': ') - logging.debug('sds=%s' % sds) if sds is not None and len(sds) > 1 and sds[0] == 'Updated': self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) logging.debug('self.storyUpdated=%s' % self.storyUpdated) - self.uuid = 'urn:uuid:' + self.host + '-a.' + self.authorId + '-s.' + self.storyId self.authorURL = 'http://' + self.host + '/u/' + self.authorId - logging.debug('self.uuid=%s' % self.uuid) return urls @@ -348,154 +308,15 @@ class FPCom(FanfictionSiteAdapter): return div.__str__('utf8') - def setLogin(self, login): - self.login = login - - def setPassword(self, password): - self.password = password - - def getHost(self): - logging.debug('self.host=%s' % self.host) - return self.host - - def getStoryURL(self): - logging.debug('self.url=%s' % self.url) - return self.url - - def getUUID(self): - logging.debug('self.uuid=%s' % self.uuid) - return self.uuid - - def getOutputName(self): - logging.debug('self.storyId=%s, self.storyName=%s self.outputName=%s' % (self.storyId, self.storyName, self.outputName)) - return self.outputName - - def getAuthorName(self): - logging.debug('self.authorName=%s' % self.authorName) - return self.authorName - - def getAuthorId(self): - logging.debug('self.authorId=%s' % self.authorId) - return self.authorId - - def getAuthorURL(self): - logging.debug('self.authorURL=%s' % self.authorURL) - return self.authorURL - - def getStoryId(self): - logging.debug('self.storyId=%s' % self.storyId) - return self.storyId - - def getStoryName(self): - logging.debug('self.storyName=%s' % self.storyName) - return self.storyName - - def getStoryDescription(self): - logging.debug('self.storyDescription=%s' % self.storyDescription) - return self.storyDescription - - def getStoryPublished(self): - logging.debug('self.storyPublished=%s' % self.storyPublished) - return self.storyPublished - - def getStoryCreated(self): - self.storyCreated = datetime.datetime.now() - logging.debug('self.storyCreated=%s' % self.storyCreated) - return self.storyCreated - - def getStoryUpdated(self): - logging.debug('self.storyUpdated=%s' % self.storyUpdated) - return self.storyUpdated - - def getLanguage(self): - logging.debug('self.language=%s' % self.language) - return self.language - - def getLanguageId(self): - logging.debug('self.languageId=%s' % self.languageId) - return self.languageId - - def getSubjects(self): - logging.debug('self.subjects=%s' % self.authorName) - return self.subjects - - def getPublisher(self): - logging.debug('self.publisher=%s' % self.publisher) - return self.publisher - - def getNumChapters(self): - logging.debug('self.numChapters=%s' % self.numChapters) - return self.numChapters - - def getNumWords(self): - logging.debug('self.numWords=%s' % self.numWords) - return self.numWords - - def getCategory(self): - logging.debug('self.category=%s' % self.category) - return self.category - - def getGenre(self): - logging.debug('self.genre=%s' % self.genre) - return self.genre - - def getStoryStatus(self): - logging.debug('self.storyStatus=%s' % self.storyStatus) - return self.storyStatus - - def getStoryRating(self): - logging.debug('self.storyRating=%s' % self.storyRating) - return self.storyRating - - def getStoryUserRating(self): - logging.debug('self.storyUserRating=%s' % self.storyUserRating) - return self.storyUserRating - - def getPrintableUrl(self, url): - pass - - def getStoryCharacters(self): - logging.debug('self.storyCharacters=%s' % self.storyCharacters) - return self.storyCharacters - - def getStorySeries(self): - logging.debug('self.storySeries=%s' % self.storySeries) - return self.storySeries -class FFA_UnitTests(unittest.TestCase): +class FPC_UnitTests(unittest.TestCase): def setUp(self): logging.basicConfig(level=logging.DEBUG) pass - def testChaptersAuthStory(self): - f = FFNet('http://www.fanfiction.net/s/5257563/1') - f.extractIndividualUrls() - - self.assertEquals('Beka0502', f.getAuthorName()) - self.assertEquals("Draco's Redemption", f.getStoryName()) - - def testChaptersCountNames(self): - f = FFNet('http://www.fanfiction.net/s/5257563/1') - urls = f.extractIndividualUrls() - - self.assertEquals(10, len(urls)) - - def testGetText(self): - url = 'http://www.fanfiction.net/s/5257563/1' - f = FFNet(url) - text = f.getText(url) - self.assertTrue(text.find('He was just about to look at some photos when he heard a crack') != -1) - - def testBrokenWands(self): - url = 'http://www.fanfiction.net/s/1527263/30/Harry_Potter_and_Broken_Wands' - f = FFNet(url) - text = f.getText(url) - - urls = f.extractIndividualUrls() - def testFictionPress(self): url = 'http://www.fictionpress.com/s/2725180/1/Behind_This_Facade' - f = FFNet(url) + f = FPCom(url) urls = f.extractIndividualUrls() self.assertEquals('Behind This Facade', f.getStoryName()) @@ -503,5 +324,6 @@ class FFA_UnitTests(unittest.TestCase): text = f.getText(url) self.assertTrue(text.find('Kale Resgerald at your service" He answered, "So, can we go now? Or do you want to') != -1) + if __name__ == '__main__': unittest.main() diff --git a/hpfiction.py b/hpfiction.py index 1bf46f55..38509391 100644 --- a/hpfiction.py +++ b/hpfiction.py @@ -75,36 +75,14 @@ class HPFiction(FanfictionSiteAdapter): self.storyUserRating = '0' self.storyCharacters = [] self.storySeries = '' - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) + self.outputName = '' + self.outputStorySep = '-hp_' logging.debug("Created HPFiction: url=%s" % (self.url)) def _getLoginScript(self): return self.path - - def requiresLogin(self, url = None): - return False - - def performLogin(self, url = None): - return True - def _addSubject(self, subject): - subj = subject.upper() - for s in self.subjects: - if s.upper() == subj: - return False - self.subjects.append(subject) - return True - - def _addCharacter(self, character): - chara = character.upper() - for c in self.storyCharacters: - if c.upper() == chara: - return False - self.storyCharacters.append(character) - return True - def extractIndividualUrls(self): data = self.opener.open(self.url).read() @@ -182,7 +160,7 @@ class HPFiction(FanfictionSiteAdapter): elif s[ii] == 'Characters:': s2 = s[ii+1].split(', ') for ss2 in s2: - self._addCharacter(ss2) + self.addCharacter(ss2) logging.debug('self.storyCharacters=%s' % self.storyCharacters) ii = ii + 2 elif s[ii] == 'Genre(s):': @@ -190,7 +168,7 @@ class HPFiction(FanfictionSiteAdapter): logging.debug('self.genre=%s' % self.genre) s2 = s[ii+1].split(', ') for ss2 in s2: - self._addSubject(ss2) + self.addSubject(ss2) logging.debug('self.subjects=%s' % self.subjects) ii = ii + 2 elif s[ii] == 'Status:': @@ -222,7 +200,6 @@ class HPFiction(FanfictionSiteAdapter): logging.debug('self.storyDescription=%s' % self.storyDescription) urls = [] - self.outputName = self.storyName.replace(" ", "_") + '-hp_' + self.storyId select = soup.find('select', {'name' : 'chapterid'}) if select is None: @@ -239,24 +216,8 @@ class HPFiction(FanfictionSiteAdapter): if title != "Story Index": urls.append((url,title)) - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) - return urls - def getHost(self): - logging.debug('self.host=%s' % self.host) - return self.host - - def getStoryName(self): - return self.storyName - - def getOutputName(self): - return self.outputName - - def getAuthorName(self): - return self.authorName - def getText(self, url): logging.debug('Downloading from URL: %s' % url) data = self.opener.open(url).read() @@ -267,96 +228,6 @@ class HPFiction(FanfictionSiteAdapter): exit(20) return divtext.__str__('utf8') - def getAuthorId(self): - logging.debug('self.authorId=%s' % self.authorId) - return self.authorId - - def getStoryId(self): - logging.debug('self.storyId=%s' % self.storyId) - return self.storyId - - def getStoryDescription(self): - logging.debug('self.storyDescription=%s' % self.storyDescription) - return self.storyDescription - - def getStoryPublished(self): - logging.debug('self.storyPublished=%s' % self.storyPublished) - return self.storyPublished - - def getStoryCreated(self): - self.storyCreated = datetime.datetime.now() - logging.debug('self.storyCreated=%s' % self.storyCreated) - return self.storyCreated - - def getStoryUpdated(self): - logging.debug('self.storyUpdated=%s' % self.storyUpdated) - return self.storyUpdated - - def getLanguage(self): - logging.debug('self.language=%s' % self.language) - return self.language - - def getLanguageId(self): - logging.debug('self.languageId=%s' % self.languageId) - return self.languageId - - def getSubjects(self): - logging.debug('self.subjects=%s' % self.authorName) - return self.subjects - - def getPublisher(self): - logging.debug('self.publisher=%s' % self.publisher) - return self.publisher - - def getNumChapters(self): - logging.debug('self.numChapters=%s' % self.numChapters) - return self.numChapters - - def getNumWords(self): - logging.debug('self.numWords=%s' % self.numWords) - return self.numWords - - def getStoryURL(self): - logging.debug('self.url=%s' % self.url) - return self.url - - def getAuthorURL(self): - logging.debug('self.authorURL=%s' % self.authorURL) - return self.authorURL - - def getUUID(self): - logging.debug('self.uuid=%s' % self.uuid) - return self.uuid - - def getCategory(self): - logging.debug('self.category=%s' % self.category) - return self.category - - def getGenre(self): - logging.debug('self.genre=%s' % self.genre) - return self.genre - - def getStoryStatus(self): - logging.debug('self.storyStatus=%s' % self.storyStatus) - return self.storyStatus - - def getStoryRating(self): - logging.debug('self.storyRating=%s' % self.storyRating) - return self.storyRating - - def getStoryUserRating(self): - logging.debug('self.storyUserRating=%s' % self.storyUserRating) - return self.storyUserRating - - def getStoryCharacters(self): - logging.debug('self.storyCharacters=%s' % self.storyCharacters) - return self.storyCharacters - - def getStorySeries(self): - logging.debug('self.storySeries=%s' % self.storySeries) - return self.storySeries - - class FF_UnitTests(unittest.TestCase): def setUp(self): diff --git a/potionsNsnitches.py b/potionsNsnitches.py index 95ac435c..3868e934 100644 --- a/potionsNsnitches.py +++ b/potionsNsnitches.py @@ -53,84 +53,37 @@ class PotionsNSnitches(FanfictionSiteAdapter): self.storyUserRating = '0' self.storyCharacters = [] self.storySeries = '' - + self.outputName = '' + self.outputStorySep = '-pns_' + self.chapurl = False ss=self.url.split('?') - logging.debug('ss=%s' % ss) if ss is not None and len(ss) > 1: sss = ss[1].replace('&','&').split('&') - logging.debug('sss=%s' % sss) if sss is not None and len(sss) > 0: ssss = sss[0].split('=') - logging.debug('ssss=%s' % ssss) if ssss is not None and len(ssss) > 1 and ssss[0] == 'sid': self.storyId = ssss[1] if len(sss) > 1: ssss = sss[1].split('=') - logging.debug('ssss=%s' % ssss) if ssss is not None and len(ssss) > 1 and ssss[0] == 'chapter': self.chapurl = True self.url = 'http://' + self.host + '/' + self.path + '?sid=' + self.storyId logging.debug('self.url=%s' % self.url) - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) - logging.debug("Created PotionsNSnitches: url=%s" % (self.url)) - def requiresLogin(self, url = None): - # potionsandsnitches.net doesn't require login. - if self.host == 'potionsandsnitches.net': - return False - else: + def _getLoginScript(self): + return '/user.php?action=login' + + def reqLoginData(self, data): + if data.find('Registered Users Only. Please click OK to login or register.') != -1 or data.find('There is no such account on our website') != -1: return True - - def performLogin(self, url = None): - data = {} - - data['penname'] = self.login - data['password'] = self.password - data['cookiecheck'] = '1' - data['submit'] = 'Submit' - - urlvals = u.urlencode(data) - loginUrl = 'http://' + self.host + self._getLoginScript() - logging.debug("Will now login to URL %s" % loginUrl) - - req = self.opener.open(loginUrl, urlvals) - - d = req.read().decode('utf-8') - - if self.reqLoginData(d) : - return False else: - return True + return False - - def setLogin(self, login): - self.login = login - - def setPassword(self, password): - self.password = password - - def _addSubject(self, subject): - subj = subject.upper() - for s in self.subjects: - if s.upper() == subj: - return False - self.subjects.append(subject) - return True - - def _addCharacter(self, character): - chara = character.upper() - for c in self.storyCharacters: - if c.upper() == chara: - return False - self.storyCharacters.append(character) - return True - def _fillCharacters(self, strlist, idx, maxlen): ii = idx while ii < maxlen: @@ -139,7 +92,7 @@ class PotionsNSnitches(FanfictionSiteAdapter): if chara.find(':') != -1: return (ii-1) elif chara.find(',') == -1: - self._addCharacter (chara) + self.addCharacter (chara) ii = ii + 1 return (ii) @@ -154,7 +107,7 @@ class PotionsNSnitches(FanfictionSiteAdapter): elif genre.find(',') != -1: genre = ', ' else: - self._addSubject (genre) + self.addSubject (genre) self.genre = self.genre + genre ii = ii + 1 return (ii) @@ -170,7 +123,7 @@ class PotionsNSnitches(FanfictionSiteAdapter): elif cat.find(',') != -1: cat = ', ' else: - self._addSubject (cat) + self.addSubject (cat) self.category = self.category + cat ii = ii + 1 return (ii) @@ -197,9 +150,8 @@ class PotionsNSnitches(FanfictionSiteAdapter): if ss is not None and len(ss) > 1: self.storyName = ss[0].strip() self.authorName = ss[1].strip() - self.outputName = self.storyName.replace(" ", "_") + '-pNs_' + self.storyId - logging.debug('self.storyId=%s, self.storyName=%s, self.outputName=%s' % (self.storyId, self.storyName, self.outputName)) + logging.debug('self.storyId=%s, self.storyName=%s' % (self.storyId, self.storyName)) logging.debug('self.authorId=%s, self.authorName=%s' % (self.authorId, self.authorName)) select = soup.find('select', { 'name' : 'chapter' } ) @@ -342,22 +294,7 @@ class PotionsNSnitches(FanfictionSiteAdapter): logging.error('self.storyName is empty!! Exitting!') exit(1) - self.outputName = self.storyName.replace(" ", "_") + '-pNs_' + self.storyId - logging.debug('self.outputName=%s' % self.outputName) - - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) - return result - - def getStoryName(self): - return self.storyName - - def getOutputName(self): - return self.outputName - - def getAuthorName(self): - return self.authorName def getText(self, url): if url.find('http://') == -1: @@ -380,107 +317,6 @@ class PotionsNSnitches(FanfictionSiteAdapter): text = div.__str__('utf8').replace(' SOMETHING_BR ','
') return text - def _getLoginScript(self): - return '/user.php?action=login' - - def reqLoginData(self, data): - if data.find('Registered Users Only. Please click OK to login or register.') != -1 or data.find('There is no such account on our website') != -1: - return True - else: - return False - - def getHost(self): - logging.debug('self.host=%s' % self.host) - return self.host - - def getStoryURL(self): - logging.debug('self.url=%s' % self.url) - return self.url - - def getAuthorURL(self): - logging.debug('self.authorURL=%s' % self.authorURL) - return self.authorURL - - def getUUID(self): - logging.debug('self.uuid=%s' % self.uuid) - return self.uuid - - def getStoryDescription(self): - logging.debug('self.storyDescription=%s' % self.storyDescription) - return self.storyDescription - - def getStoryPublished(self): - logging.debug('self.storyPublished=%s' % self.storyPublished) - return self.storyPublished - - def getStoryCreated(self): - self.storyCreated = datetime.datetime.now() - logging.debug('self.storyCreated=%s' % self.storyCreated) - return self.storyCreated - - def getStoryUpdated(self): - logging.debug('self.storyUpdated=%s' % self.storyUpdated) - return self.storyUpdated - - def getLanguage(self): - logging.debug('self.language=%s' % self.language) - return self.language - - def getLanguageId(self): - logging.debug('self.languageId=%s' % self.languageId) - return self.languageId - - def getSubjects(self): - logging.debug('self.subjects=%s' % self.authorName) - return self.subjects - - def getPublisher(self): - logging.debug('self.publisher=%s' % self.publisher) - return self.publisher - - def getNumChapters(self): - logging.debug('self.numChapters=%s' % self.numChapters) - return self.numChapters - - def getNumWords(self): - logging.debug('self.numWords=%s' % self.numWords) - return self.numWords - - def getAuthorId(self): - logging.debug('self.authorId=%s' % self.authorId) - return self.authorId - - def getStoryId(self): - logging.debug('self.storyId=%s' % self.storyId) - return self.storyId - - def getCategory(self): - logging.debug('self.category=%s' % self.category) - return self.category - - def getGenre(self): - logging.debug('self.genre=%s' % self.genre) - return self.genre - - def getStoryStatus(self): - logging.debug('self.storyStatus=%s' % self.storyStatus) - return self.storyStatus - - def getStoryRating(self): - logging.debug('self.storyRating=%s' % self.storyRating) - return self.storyRating - - def getStoryUserRating(self): - logging.debug('self.storyUserRating=%s' % self.storyUserRating) - return self.storyUserRating - - def getStoryCharacters(self): - logging.debug('self.storyCharacters=%s' % self.storyCharacters) - return self.storyCharacters - - def getStorySeries(self): - logging.debug('self.storySeries=%s' % self.storySeries) - return self.storySeries class PotionsNSnitches_UnitTests(unittest.TestCase): def setUp(self): diff --git a/twilighted.py b/twilighted.py index d45e4688..ed293406 100644 --- a/twilighted.py +++ b/twilighted.py @@ -43,13 +43,15 @@ class Twilighted(FanfictionSiteAdapter): self.publisher = self.host self.numChapters = 0 self.numWords = 0 - self.genre = 'FanFiction' - self.category = 'Category' + self.genre = '' + self.category = 'Fanfiction' self.storyStatus = 'In-Progress' self.storyRating = 'PG' self.storyUserRating = '0' self.storyCharacters = [] self.storySeries = '' + self.outputName = '' + self.outputStorySep = '-tw_' self.chapurl = False ss=self.url.split('?') @@ -71,18 +73,19 @@ class Twilighted(FanfictionSiteAdapter): self.url = 'http://' + self.host + '/' + self.path + '?sid=' + self.storyId logging.debug('self.url=%s' % self.url) - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) - logging.debug("Created Twilighted: url=%s" % (self.url)) + def _getLoginScript(self): + return '/user.php?action=login' + + def reqLoginData(self, data): + if data.find('Registered Users Only. Please click OK to login or register.') != -1 or data.find('There is no such account on our website') != -1: + return True + else: + return False def requiresLogin(self, url = None): - # potionsandsnitches.net doesn't require login. - if self.host == 'potionsandsnitches.net': - return False - else: - return True + return True def performLogin(self, url = None): data = {} @@ -105,29 +108,6 @@ class Twilighted(FanfictionSiteAdapter): else: return True - - def setLogin(self, login): - self.login = login - - def setPassword(self, password): - self.password = password - - def _addSubject(self, subject): - subj = subject.upper() - for s in self.subjects: - if s.upper() == subj: - return False - self.subjects.append(subject) - return True - - def _addCharacter(self, character): - chara = character.upper() - for c in self.storyCharacters: - if c.upper() == chara: - return False - self.storyCharacters.append(character) - return True - def extractIndividualUrls(self): url = self.url + '&chapter=1' data = self.opener.open(url).read() @@ -144,9 +124,8 @@ class Twilighted(FanfictionSiteAdapter): logging.debug('Title: %s' % title) self.storyName = title.split(' by ')[0].strip() self.authorName = title.split(' by ')[1].strip() - self.outputName = self.storyName.replace(" ", "_") + '-tw_' + self.storyId - logging.debug('self.storyId=%s, self.storyName=%s, self.outputName=%s' % (self.storyId, self.storyName, self.outputName)) + logging.debug('self.storyId=%s, self.storyName=%s' % (self.storyId, self.storyName)) logging.debug('self.authorId=%s, self.authorName=%s' % (self.authorId, self.authorName)) select = soup.find('select', { 'name' : 'chapter' } ) @@ -192,8 +171,6 @@ class Twilighted(FanfictionSiteAdapter): str1 = a.string (vs, self.storyId) = a['href'].split('=') logging.debug('self.storyId=%s self.storyName=%s' % (self.storyId, self.storyName)) - self.outputName = self.outputName + "-tw_" + self.storyId - logging.debug('self.outputName=%s' % self.outputName) if a['href'].find('viewuser.php?uid=') != -1: str1 = a.string (vs, self.authorId) = a['href'].split('=') @@ -217,16 +194,19 @@ class Twilighted(FanfictionSiteAdapter): ii = ii+1 while stlen > ii and len(strs[ii]) != 0 and strs[ii].find(':') == -1: if strs[ii] != ' ' and strs[ii] != ', ': - if self.category == 'Category': + if len(self.genre) > 0: + self.genre = self.genre + ', ' + self.genre = strs[ii].strip(' ') + if len(self.category) == 0: self.category = strs[ii].strip(' ') - self._addSubject(strs[ii].strip(' ')) + self.addSubject(strs[ii].strip(' ')) ii = ii+1 logging.debug('self.subjects=%s' % self.subjects) if strs[ii] == 'Characters: ': ii = ii+1 while stlen > ii and len(strs[ii]) != 0 and strs[ii].find(':') == -1: if strs[ii] != ' ' and strs[ii] != ', ': - self._addCharacter(strs[ii].strip(' ')) + self.addCharacter(strs[ii].strip(' ')) ii = ii+1 logging.debug('self.storyCharacters=%s' % self.storyCharacters) elif strs[ii] == 'Completed:': @@ -266,20 +246,8 @@ class Twilighted(FanfictionSiteAdapter): logging.debug('Skipped Label \"%s\" Value \"%s\"' % (strs[ii], strs[ii+1])) ii = ii+2 - self.uuid = 'urn:uuid:' + self.host + '-u.' + self.authorId + '-s.' + self.storyId - logging.debug('self.uuid=%s' % self.uuid) - return result - def getStoryName(self): - return self.storyName - - def getOutputName(self): - return self.outputName - - def getAuthorName(self): - return self.authorName - def getText(self, url): if url.find('http://') == -1: url = 'http://' + self.host + '/' + url @@ -297,107 +265,6 @@ class Twilighted(FanfictionSiteAdapter): return div.__str__('utf8') - def _getLoginScript(self): - return '/user.php?action=login' - - def reqLoginData(self, data): - if data.find('Registered Users Only. Please click OK to login or register.') != -1 or data.find('There is no such account on our website') != -1: - return True - else: - return False - - def getHost(self): - logging.debug('self.host=%s' % self.host) - return self.host - - def getStoryURL(self): - logging.debug('self.url=%s' % self.url) - return self.url - - def getAuthorURL(self): - logging.debug('self.authorURL=%s' % self.authorURL) - return self.authorURL - - def getUUID(self): - logging.debug('self.uuid=%s' % self.uuid) - return self.uuid - - def getStoryDescription(self): - logging.debug('self.storyDescription=%s' % self.storyDescription) - return self.storyDescription - - def getStoryPublished(self): - logging.debug('self.storyPublished=%s' % self.storyPublished) - return self.storyPublished - - def getStoryCreated(self): - self.storyCreated = datetime.datetime.now() - logging.debug('self.storyCreated=%s' % self.storyCreated) - return self.storyCreated - - def getStoryUpdated(self): - logging.debug('self.storyUpdated=%s' % self.storyUpdated) - return self.storyUpdated - - def getLanguage(self): - logging.debug('self.language=%s' % self.language) - return self.language - - def getLanguageId(self): - logging.debug('self.languageId=%s' % self.languageId) - return self.languageId - - def getSubjects(self): - logging.debug('self.subjects=%s' % self.authorName) - return self.subjects - - def getPublisher(self): - logging.debug('self.publisher=%s' % self.publisher) - return self.publisher - - def getNumChapters(self): - logging.debug('self.numChapters=%s' % self.numChapters) - return self.numChapters - - def getNumWords(self): - logging.debug('self.numWords=%s' % self.numWords) - return self.numWords - - def getAuthorId(self): - logging.debug('self.authorId=%s' % self.authorId) - return self.authorId - - def getStoryId(self): - logging.debug('self.storyId=%s' % self.storyId) - return self.storyId - - def getCategory(self): - logging.debug('self.category=%s' % self.category) - return self.category - - def getGenre(self): - logging.debug('self.genre=%s' % self.genre) - return self.genre - - def getStoryStatus(self): - logging.debug('self.storyStatus=%s' % self.storyStatus) - return self.storyStatus - - def getStoryRating(self): - logging.debug('self.storyRating=%s' % self.storyRating) - return self.storyRating - - def getStoryUserRating(self): - logging.debug('self.storyUserRating=%s' % self.storyUserRating) - return self.storyUserRating - - def getStoryCharacters(self): - logging.debug('self.storyCharacters=%s' % self.storyCharacters) - return self.storyCharacters - - def getStorySeries(self): - logging.debug('self.storySeries=%s' % self.storySeries) - return self.storySeries class Twilighted_UnitTests(unittest.TestCase): def setUp(self):