From 5143a2187a08d3481d4e7132c33d2a66020d03b9 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Fri, 18 Dec 2009 18:55:57 +0000 Subject: [PATCH] fictionpress.com support added --- constants.py | 4 ++-- downaloder.py | 5 ++++- ffnet.py | 21 ++++++++++++++++++--- output.py | 2 +- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/constants.py b/constants.py index 27515658..6af00eb6 100644 --- a/constants.py +++ b/constants.py @@ -102,7 +102,7 @@ XHTML_END = ''' acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', 'blockquote', 'br', 'center', 'cite', 'code', 'col', - 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', + 'colgroup', 'dd', 'del', 'dfn', 'dir', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'i', 'ins', 'kbd', 'label', 'li', 'ol', 'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike', @@ -110,7 +110,7 @@ acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', acceptable_attributes = ['href'] -entities = { '–' : ' - ', '—' : ' - ', '”' : '"', '“' : '"', '’' : '\'', '‘' : '\'', '"' : '"' } +entities = { '–' : ' - ', '—' : ' - ', '”' : '"', '“' : '"', '’' : '\'', '‘' : '\'', '"' : '"', '…' : '...' } FB2_PROLOGUE = '' FB2_DESCRIPTION = ''' diff --git a/downaloder.py b/downaloder.py index 34b369db..fa9d1466 100644 --- a/downaloder.py +++ b/downaloder.py @@ -17,6 +17,7 @@ import ffnet import ficwad import output import fictionalley +import hpfiction class FanficLoader: '''A controller class which handles the interaction between various specific downloaders and writers''' @@ -66,8 +67,10 @@ if __name__ == '__main__': sys.exit(0) elif url.find('ficwad') != -1: adapter = ficwad.FicWad(url) - elif url.find('fanfiction.net') != -1: + elif url.find('fanfiction.net') != -1 or url.find('fictionpress.com') != -1: adapter = ffnet.FFNet(url) + elif url.find('harrypotterfanfiction.com') != -1: + adapter = hpfiction.HPFiction(url) else: print >> sys.stderr, "Oi! I can haz not appropriate adapter for URL %s!" % url sys.exit(1) diff --git a/ffnet.py b/ffnet.py index 4af36792..ddb7e53b 100644 --- a/ffnet.py +++ b/ffnet.py @@ -77,11 +77,17 @@ class FFNet(FanfictionSiteAdapter): def extractIndividualUrls(self): data = self._fetchUrl(self.url) + d2 = re.sub('&\#[0-9]+;', ' ', data) + soup = bs.BeautifulStoneSoup(d2) + allA = soup.findAll('a') + for a in allA: + if 'href' in a._getAttrMap() and a['href'].find('/u/') != -1: + self.authorName = a.string urls = [] lines = data.split('\n') for l in lines: - if l.find("") != -1: + if l.find("»") != -1 and l.find('') != -1: s2 = bs.BeautifulStoneSoup(l) self.storyName = s2.find('b').string elif l.find("