mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
somemerges
This commit is contained in:
commit
c123e77221
4 changed files with 25 additions and 7 deletions
|
|
@ -102,7 +102,7 @@ XHTML_END = '''</div>
|
||||||
|
|
||||||
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
|
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
|
||||||
'blockquote', 'br', 'center', 'cite', 'code', 'col',
|
'blockquote', 'br', 'center', 'cite', 'code', 'col',
|
||||||
'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em',
|
'colgroup', 'dd', 'del', 'dfn', 'dir', 'dl', 'dt', 'em',
|
||||||
'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'i',
|
'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'i',
|
||||||
'ins', 'kbd', 'label', 'li', 'ol',
|
'ins', 'kbd', 'label', 'li', 'ol',
|
||||||
'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike',
|
'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike',
|
||||||
|
|
@ -110,7 +110,7 @@ acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
|
||||||
|
|
||||||
acceptable_attributes = ['href']
|
acceptable_attributes = ['href']
|
||||||
|
|
||||||
entities = { '–' : ' - ', '—' : ' - ', '”' : '"', '“' : '"', '’' : '\'', '‘' : '\'', '"' : '"' }
|
entities = { '–' : ' - ', '—' : ' - ', '”' : '"', '“' : '"', '’' : '\'', '‘' : '\'', '"' : '"', '…' : '...' }
|
||||||
|
|
||||||
FB2_PROLOGUE = '<FictionBook>'
|
FB2_PROLOGUE = '<FictionBook>'
|
||||||
FB2_DESCRIPTION = '''<description>
|
FB2_DESCRIPTION = '''<description>
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ import ffnet
|
||||||
import ficwad
|
import ficwad
|
||||||
import output
|
import output
|
||||||
import fictionalley
|
import fictionalley
|
||||||
|
import hpfiction
|
||||||
|
|
||||||
class FanficLoader:
|
class FanficLoader:
|
||||||
'''A controller class which handles the interaction between various specific downloaders and writers'''
|
'''A controller class which handles the interaction between various specific downloaders and writers'''
|
||||||
|
|
@ -76,8 +77,10 @@ if __name__ == '__main__':
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
elif url.find('ficwad') != -1:
|
elif url.find('ficwad') != -1:
|
||||||
adapter = ficwad.FicWad(url)
|
adapter = ficwad.FicWad(url)
|
||||||
elif url.find('fanfiction.net') != -1:
|
elif url.find('fanfiction.net') != -1 or url.find('fictionpress.com') != -1:
|
||||||
adapter = ffnet.FFNet(url)
|
adapter = ffnet.FFNet(url)
|
||||||
|
elif url.find('harrypotterfanfiction.com') != -1:
|
||||||
|
adapter = hpfiction.HPFiction(url)
|
||||||
else:
|
else:
|
||||||
print >> sys.stderr, "Oi! I can haz not appropriate adapter for URL %s!" % url
|
print >> sys.stderr, "Oi! I can haz not appropriate adapter for URL %s!" % url
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
|
||||||
21
ffnet.py
21
ffnet.py
|
|
@ -80,11 +80,17 @@ class FFNet(FanfictionSiteAdapter):
|
||||||
|
|
||||||
def extractIndividualUrls(self):
|
def extractIndividualUrls(self):
|
||||||
data = self._fetchUrl(self.url)
|
data = self._fetchUrl(self.url)
|
||||||
|
d2 = re.sub('&\#[0-9]+;', ' ', data)
|
||||||
|
soup = bs.BeautifulStoneSoup(d2)
|
||||||
|
allA = soup.findAll('a')
|
||||||
|
for a in allA:
|
||||||
|
if 'href' in a._getAttrMap() and a['href'].find('/u/') != -1:
|
||||||
|
self.authorName = a.string
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
lines = data.split('\n')
|
lines = data.split('\n')
|
||||||
for l in lines:
|
for l in lines:
|
||||||
if l.find("<img src='http://c.fanfiction.net/static/ficons/script.png' width=16 height=16 border=0 align=absmiddle>") != -1:
|
if l.find("»") != -1 and l.find('<b>') != -1:
|
||||||
s2 = bs.BeautifulStoneSoup(l)
|
s2 = bs.BeautifulStoneSoup(l)
|
||||||
self.storyName = s2.find('b').string
|
self.storyName = s2.find('b').string
|
||||||
elif l.find("<a href='/u/") != -1:
|
elif l.find("<a href='/u/") != -1:
|
||||||
|
|
@ -98,7 +104,7 @@ class FFNet(FanfictionSiteAdapter):
|
||||||
s2 = bs.BeautifulSoup(u)
|
s2 = bs.BeautifulSoup(u)
|
||||||
options = s2.findAll('option')
|
options = s2.findAll('option')
|
||||||
for o in options:
|
for o in options:
|
||||||
url = 'http://fanfiction.net/s/' + self.storyId + '/' + o['value']
|
url = 'http://' + self.host + '/s/' + self.storyId + '/' + o['value']
|
||||||
title = o.string
|
title = o.string
|
||||||
logging.debug('URL = `%s`, Title = `%s`' % (url, title))
|
logging.debug('URL = `%s`, Title = `%s`' % (url, title))
|
||||||
urls.append((url,title))
|
urls.append((url,title))
|
||||||
|
|
@ -156,7 +162,16 @@ class FFA_UnitTests(unittest.TestCase):
|
||||||
text = f.getText(url)
|
text = f.getText(url)
|
||||||
|
|
||||||
urls = f.extractIndividualUrls()
|
urls = f.extractIndividualUrls()
|
||||||
|
|
||||||
|
|
||||||
|
def testFictionPress(self):
|
||||||
|
url = 'http://www.fictionpress.com/s/2725180/1/Behind_This_Facade'
|
||||||
|
f = FFNet(url)
|
||||||
|
urls = f.extractIndividualUrls()
|
||||||
|
|
||||||
|
self.assertEquals('Behind This Facade', f.getStoryName())
|
||||||
|
self.assertEquals('IntoxicatingMelody', f.getAuthorName())
|
||||||
|
|
||||||
|
text = f.getText(url)
|
||||||
|
self.assertTrue(text.find('Kale Resgerald at your service" He answered, "So, can we go now? Or do you want to') != -1)
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
@ -167,7 +167,7 @@ class EPubFanficWriter(FanficWriter):
|
||||||
if p.string != None and (len(p.string.strip()) == 0 or p.string.strip() == ' ' ) :
|
if p.string != None and (len(p.string.strip()) == 0 or p.string.strip() == ' ' ) :
|
||||||
p.extract()
|
p.extract()
|
||||||
|
|
||||||
allBrs = self.soup.findAll(recursive=True, name = ["br", "hr"])
|
allBrs = self.soup.findAll(recursive=True, name = ["br", "hr", 'div'])
|
||||||
for br in allBrs:
|
for br in allBrs:
|
||||||
if (br.string != None and len(br.string.strip()) != 0) or (br.contents != None):
|
if (br.string != None and len(br.string.strip()) != 0) or (br.contents != None):
|
||||||
br.name = 'p'
|
br.name = 'p'
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue