diff --git a/downloader.py b/downloader.py index 3dfd7fc0..cb4ca7d4 100644 --- a/downloader.py +++ b/downloader.py @@ -109,6 +109,7 @@ if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) argvlen = len(sys.argv) url = None + bookFormat = 'epub' if argvlen > 1: url = sys.argv[1] if argvlen > 2: @@ -121,7 +122,7 @@ if __name__ == '__main__': if type(url) is unicode: print('URL is unicode') url = url.encode('latin1') - + url = url.strip() adapter = None writerClass = None diff --git a/ffnet.py b/ffnet.py index a6fee2dc..3963d529 100644 --- a/ffnet.py +++ b/ffnet.py @@ -154,7 +154,7 @@ class FFNet(FanfictionSiteAdapter): data = self.fetchUrl(self.url) except Exception, e: data = '' - logging.error("Caught an exception reading URL " + self.url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + self.url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + self.url + "!") @@ -178,16 +178,16 @@ class FFNet(FanfictionSiteAdapter): for l in lines: if l.find("»") != -1 and l.find('') != -1: s2 = bs.BeautifulStoneSoup(l) - self.storyName = str(s2.find('b').string) + self.storyName = unicode(s2.find('b').string) logging.debug('self.storyId=%s, self.storyName=%s' % (self.storyId, self.storyName)) elif l.find(" 2: keystr = br.contents[1] if keystr is not None: - strs = re.split ("<[^>]+>", str(keystr)) + strs = re.split ("<[^>]+>", unicode(keystr)) keystr='' for s in strs: keystr = keystr + s @@ -129,7 +129,7 @@ class FictionAlley(FanfictionSiteAdapter): data = self.opener.open(self.url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + self.url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + self.url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + self.url + "!") @@ -154,7 +154,7 @@ class FictionAlley(FanfictionSiteAdapter): #logging.debug('bca=%s' % bca ) if 'href' in bca._getAttrMap(): #logging.debug('bca.href=%s' % bca['href'] ) - url = str(bca['href']) + url = unicode(bca['href']) if url is not None and len(url) > 0: self.url = url logging.debug('self.url=%s' % self.url ) @@ -207,7 +207,7 @@ class FictionAlley(FanfictionSiteAdapter): if len(ss1) > 1 and ss1[0] == 'Rating': self.storyRating = ss1[1] logging.debug('self.storyRating=%s' % self.storyRating) - self.storyDescription = str(ss[1]).replace("
","").replace("
","").replace('\n','') + self.storyDescription = unicode(ss[1]).replace("
","").replace("
","").replace('\n','') logging.debug('self.storyDescription=%s' % self.storyDescription) for li in links: @@ -260,7 +260,7 @@ class FictionAlley(FanfictionSiteAdapter): data = self.opener.open(url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) diff --git a/ficwad.py b/ficwad.py index a2aab789..13e7a45d 100644 --- a/ficwad.py +++ b/ficwad.py @@ -63,7 +63,7 @@ class FicWad(FanfictionSiteAdapter): data = u2.urlopen(self.url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + self.url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + self.url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + self.url + "!") @@ -99,19 +99,19 @@ class FicWad(FanfictionSiteAdapter): logging.debug('self.storyName=%s, self.storyId=%s' % (self.storyName, self.storyId)) author = soup.find('span', {'class' : 'author'}) - self.authorName = str(author.a.string) + self.authorName = unicode(author.a.string) (u0, u1,self.authorId) = author.a['href'].split('/') self.authorURL = 'http://' + self.host + author.a['href'] logging.debug('self.authorName=%s self.authorId=%s' % (self.authorName, self.authorId)) description = soup.find('blockquote', {'class' : 'summary'}) if description is not None: - self.storyDescription = str(description.p.string) + self.storyDescription = unicode(description.p.string) logging.debug('self.storyDescription=%s' % self.storyDescription) meta = soup.find('p', {'class' : 'meta'}) if meta is not None: - s = str(meta).replace('\n',' ').replace('\t','').split(' - ') + s = unicode(meta).replace('\n',' ').replace('\t','').split(' - ') #logging.debug('meta.s=%s' % s) for ss in s: s1 = ss.replace(' ','').split(':') @@ -128,7 +128,7 @@ class FicWad(FanfictionSiteAdapter): allAs = soup1.findAll('a') for a in allAs: if self.category == 'Category': - self.category = str(a.string) + self.category = unicode(a.string) logging.debug('self.category=%s' % self.category) self.addSubject(self.category) logging.debug('self.subjects=%s' % self.subjects) @@ -240,7 +240,7 @@ class FicWad(FanfictionSiteAdapter): data = u2.urlopen(url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) diff --git a/fpcom.py b/fpcom.py index 3598c2ec..471c0b85 100644 --- a/fpcom.py +++ b/fpcom.py @@ -142,7 +142,7 @@ class FPCom(FanfictionSiteAdapter): data = self.fetchUrl(self.url) except Exception, e: data = '' - logging.error("Caught an exception reading URL " + self.url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + self.url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + self.url + "!") @@ -167,7 +167,7 @@ class FPCom(FanfictionSiteAdapter): if metas is not None: for meta in metas: if 'content' in meta._getAttrMap(): - self.storyDescription = str(meta['content']) + self.storyDescription = unicode(meta['content']) logging.debug('self.storyDescription=%s' % self.storyDescription) title=meta.find('title') @@ -220,7 +220,7 @@ class FPCom(FanfictionSiteAdapter): logging.debug('URL=%s, Title=%s' % (url, self.storyName)) urls.append((url, self.storyName)) - self.numChapters = str(numchapters) + self.numChapters = unicode(numchapters) logging.debug('self.numChapters=%s' % self.numChapters) logging.debug('urls=%s' % urls) @@ -241,7 +241,7 @@ class FPCom(FanfictionSiteAdapter): logging.debug('self.subjects=%s' % self.subjects) tda = td.find ('a') if tda is not None and tda.string.find('Rated:') != -1: - tdas = re.split ("<[^>]+>", str(td).replace('\n','').replace(' ',' ')) + tdas = re.split ("<[^>]+>", unicode(td).replace('\n','').replace(' ',' ')) if tdas is not None: ll = len(tdas) if ll > 2: @@ -295,7 +295,7 @@ class FPCom(FanfictionSiteAdapter): data = self.fetchUrl(url) except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) diff --git a/hpfiction.py b/hpfiction.py index b2116efe..aeda7d36 100644 --- a/hpfiction.py +++ b/hpfiction.py @@ -89,7 +89,7 @@ class HPFiction(FanfictionSiteAdapter): data = self.opener.open(self.url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + self.url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + self.url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + self.url + "!") @@ -145,7 +145,7 @@ class HPFiction(FanfictionSiteAdapter): (u1, self.authorId) = a['href'].split('=') logging.debug('self.authorName=%s, self.authorId=%s' % (self.authorName, self.authorId)) elif a['href'].find('chapterid=') != -1 and len(def_chapurl) == 0: - def_chapurl = 'http://' + self.host + '/viewstory.php' + str(a['href']) + def_chapurl = 'http://' + self.host + '/viewstory.php' + unicode(a['href']) def_chaptitle = a.string logging.debug('def_chapurl=%s, def_chaptitle=%s' % (def_chapurl, def_chaptitle)) @@ -154,7 +154,7 @@ class HPFiction(FanfictionSiteAdapter): tds = center.findAll ('td') if tds is not None and len(tds) > 0: for td in tds: - s = re.split ("<[^>]+>", str(td).replace('\n','').replace(' ',' ')) + s = re.split ("<[^>]+>", unicode(td).replace('\n','').replace(' ',' ')) ii = 0 ll = len(s) sss = '' @@ -236,7 +236,7 @@ class HPFiction(FanfictionSiteAdapter): data = self.opener.open(url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) diff --git a/mediaminer.py b/mediaminer.py index 2b790f45..6a4c03a7 100644 --- a/mediaminer.py +++ b/mediaminer.py @@ -151,7 +151,7 @@ class MediaMiner(FanfictionSiteAdapter): data = self.fetchUrl(self.url) except Exception, e: data = None - logging.error("Caught an exception reading URL " + self.url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + self.url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + self.url + "!") @@ -170,7 +170,7 @@ class MediaMiner(FanfictionSiteAdapter): #logging.debug('td_ffh=%s' % td_ffh) if td_ffh is not None: #logging.debug('td_ffh.text=%s' % td_ffh.find(text=True)) - self.storyName = str(td_ffh.find(text=True)).strip() + self.storyName = unicode(td_ffh.find(text=True)).strip() logging.debug('self.storyId=%s, self.storyName=%s' % (self.storyId, self.storyName)) fft = td_ffh.find('font', {'class' : 'smtxt'}) #logging.debug('fft=%s' % fft) @@ -193,7 +193,7 @@ class MediaMiner(FanfictionSiteAdapter): if 'class' in td._getAttrMap() and td['class'] != 'smtxt': #logging.debug('td has class attribute but is not smtxt') continue - ss = str(td).replace('\n','').replace('\r','').replace(' ', ' ') + ss = unicode(td).replace('\n','').replace('\r','').replace(' ', ' ') #logging.debug('ss=%s' % ss) if len(ss) > 1 and (ss.find('Genre(s):') != -1 or ss.find('Type:') != -1): #logging.debug('ss=%s' % ss) @@ -204,7 +204,7 @@ class MediaMiner(FanfictionSiteAdapter): nvs = bs.NavigableString('') sst='' ssb = ssbs[bb] - ssbt = str(ssb.text).strip() + ssbt = unicode(ssb.text).strip() #logging.debug('ssb=%s' % ssb) #logging.debug('ssbt=%s' % ssbt) ssbn = ssb.nextSibling @@ -212,7 +212,7 @@ class MediaMiner(FanfictionSiteAdapter): #logging.debug('ssbn=%s' % ssbn) #logging.debug('ssbn.class=%s' % ssbn.__class__) if nvs.__class__ == ssbn.__class__: - st = str(ssbn) + st = unicode(ssbn) if st.strip() != '|': sst = sst + st else: @@ -323,7 +323,7 @@ class MediaMiner(FanfictionSiteAdapter): logging.debug('URL=%s, Title=%s' % (url, ssd)) urls.append((url, ssd)) - self.numChapters = str(numchapters) + self.numChapters = unicode(numchapters) logging.debug('self.numChapters=%s' % self.numChapters) #logging.debug('urls=%s' % urls) @@ -337,7 +337,7 @@ class MediaMiner(FanfictionSiteAdapter): data = self.fetchUrl(url) except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) @@ -359,7 +359,7 @@ class MediaMiner(FanfictionSiteAdapter): while nxta is not None and not done: #logging.debug('nxta=%s' % nxta) #logging.debug('nxta.class=%s' % nxta.__class__) - st = str(nxta) + st = unicode(nxta) if nvs.__class__ != nxta.__class__: #logging.debug('nxta.name=%s' % nxta.name) if nxta.name == 'table': diff --git a/output.py b/output.py index 3f4189d5..af6788ce 100644 --- a/output.py +++ b/output.py @@ -269,9 +269,9 @@ class EPubFanficWriter(FanficWriter): description = self.adapter.getStoryDescription() if hasattr(description, "text"): - description = str(description.text) + description = unicode(description.text) else: - description = str(description) + description = unicode(description) if description is not None and len(description) > 0: description = description.replace ('\\\'', '\'').replace('\\\"', '\"') description = removeEntities(description) @@ -289,7 +289,7 @@ class EPubFanficWriter(FanficWriter): self._writeFile(titleFilePath, TITLE_ENTRY % ('Packaged:', createda)) tmpstr = self.adapter.getStoryRating() + " / " + self.adapter.getStoryUserRating() self._writeFile(titleFilePath, TITLE_ENTRY % ('Rating Age/User:', tmpstr)) - tmpstr = str(self.adapter.getNumChapters()) + " / " + str(self.adapter.getNumWords()) + tmpstr = unicode(self.adapter.getNumChapters()) + " / " + unicode(self.adapter.getNumWords()) self._writeFile(titleFilePath, TITLE_ENTRY % ('Chapters/Words:', tmpstr)) self._writeFile(titleFilePath, TITLE_ENTRY % ('Publisher:', self.adapter.getHost())) self._writeFile(titleFilePath, TITLE_ENTRY % ('Story ID:', self.adapter.getStoryId())) diff --git a/potionsNsnitches.py b/potionsNsnitches.py index d0c3fa53..43dc47a7 100644 --- a/potionsNsnitches.py +++ b/potionsNsnitches.py @@ -69,7 +69,7 @@ class PotionsNSnitches(FanfictionSiteAdapter): if ssss is not None and len(ssss) > 1 and ssss[0] == 'chapter': self.chapurl = True - self.url = 'http://' + self.host + '/' + self.path + '?sid=' + self.storyId + self.url = 'http://' + self.host + self.path + '?sid=' + self.storyId logging.debug('self.url=%s' % self.url) logging.debug("Created PotionsNSnitches: url=%s" % (self.url)) @@ -135,7 +135,7 @@ class PotionsNSnitches(FanfictionSiteAdapter): data = self.opener.open(url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + url + "!") @@ -147,7 +147,7 @@ class PotionsNSnitches(FanfictionSiteAdapter): data = self.opener.open(url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + url + "!") @@ -211,9 +211,9 @@ class PotionsNSnitches(FanfictionSiteAdapter): logging.debug('self.authorName=%s, self.authorId=%s' % (self.authorName, self.authorId)) output = soup.find('div', {'id' : 'output'}) - #logging.debug('output: %s' % str(output)) - if output is not None and len(str(output)) > 1: - s2 = re.split ('<[^>]+>', str(output)) + #logging.debug('output: %s' % unicode(output)) + if output is not None and len(unicode(output)) > 1: + s2 = re.split ('<[^>]+>', unicode(output)) #logging.debug('s2=%s' % s2) ii = 0 ll = len(s2) @@ -229,8 +229,8 @@ class PotionsNSnitches(FanfictionSiteAdapter): cnttd = cnt.findAll('td') #logging.debug('cnttd: %s' % cnttd) for td in cnttd: - #logging.debug('td: %s' % str(td)) - ss = str(td).replace('\n','').replace('\r','').replace(' ', ' ') + #logging.debug('td: %s' % unicode(td)) + ss = unicode(td).replace('\n','').replace('\r','').replace(' ', ' ') if len(ss) > 1: s2 = re.split ('<[^>]+>', ss) #logging.debug('s2=%s' % s2) @@ -288,7 +288,7 @@ class PotionsNSnitches(FanfictionSiteAdapter): tls = soup.findAll('div', {'style' : 'text-align: center;'}) for tl in tls: #logging.debug('tl: %s' % tl) - ss = str(tl).replace('\n','').replace('\r','').replace(' ', ' ') + ss = unicode(tl).replace('\n','').replace('\r','').replace(' ', ' ') if ss.find('Published:') != -1: s2 = re.split ('<[^>]+>', ss) #logging.debug('s2: %s' % s2) @@ -327,7 +327,7 @@ class PotionsNSnitches(FanfictionSiteAdapter): data = self.opener.open(url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) diff --git a/twilighted.py b/twilighted.py index 00f59364..f3b72e3c 100644 --- a/twilighted.py +++ b/twilighted.py @@ -116,7 +116,7 @@ class Twilighted(FanfictionSiteAdapter): data = self.opener.open(url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + url + "!") @@ -128,7 +128,7 @@ class Twilighted(FanfictionSiteAdapter): data = self.opener.open(url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise StoryDoesNotExist("Problem reading story URL " + url + "!") @@ -280,7 +280,7 @@ class Twilighted(FanfictionSiteAdapter): data = self.opener.open(url).read() except Exception, e: data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + str(e) + ".") + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") if data is None: raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url)