import os import re import sys import cgi import uuid import shutil import base64 import os.path import urllib as u import pprint as pp import urllib2 as u2 import urlparse as up import BeautifulSoup as bs import htmlentitydefs as hdefs from constants import * from ficwad import * class FFA: storyName = None def __init__(self): self.grabUrl = re.compile('(\(.+?)\<') self.grabAuthor = re.compile('.+pemail.+\'(\w+)') def getPasswordLine(self): return ' 0: title = titles[0] print(title) (website, rest) = title.string.split('::') story_chapter = rest.split("-") story = story_chapter[0].strip() self.storyName = story return result def getStoryName(self): return self.storyName def getAuthorName(self): return self.authorName def getText(self, data, fetch = False): lines = data.split('\n') begin = self._findIndex(lines, '', 0)+1 if begin == 0: begiun = self._findIndex(lines, '

', 24) if begin == 0: print('BAD start') pp.pprint(lines) sys.abort() end = self._findIndex(lines, '

' % (begin, end)) return "\n".join(lines[begin:end]) class Downloader: login = None password = None url = None host = None first = None opener = None writer = None def __init__(self, url, login, password): self.login = login self.password = password self.url = url self.infoProvider = FicWad() #FFA() parse = up.urlparse(url) self.host = parse.scheme + '://' + parse.netloc self.first = parse.path; self.loginUrl = self.host + self.infoProvider.getLoginScript() self.opener = u2.build_opener(u2.HTTPCookieProcessor()) def _loginRequired(self): print('is login required?') resp = self.opener.open(self.url) data = resp.read() if data.find(self.infoProvider.getPasswordLine()) != -1: print('yep') return True else: print('nada') return False def _login(self): (login, data) = self.infoProvider.getLoginPasswordOthers() data[login['login']] = self.login data[login['password']] = self.password urlvals = u.urlencode(data) req = self.opener.open(self.loginUrl, urlvals) if req.read().find(self.infoProvider.getPasswordLine()) != -1: return False else: return True def _getContent(self, url): print("" % url) return self.opener.open(url).read() def download(self): first = self._getContent(self.host + self.first) urls = self.infoProvider.extractIndividualUrls(first, self.host, self.first) self.writer = EPubFanficWriter("books", self.infoProvider.getStoryName(), self.infoProvider.getAuthorName()) for u,n in urls: text = self.infoProvider.getText(self._getContent(self.host+"/"+u)) self.writer.writeChapter(n, text) self.writer.finalise() if __name__ == '__main__': f = Downloader(sys.argv[1], 'sigizmund', '***************') if f._loginRequired(): f._login() f.download()