mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Strip HTML from story titles in all the adapters that don't already.
This commit is contained in:
parent
b5d00951de
commit
31b7257496
75 changed files with 83 additions and 84 deletions
|
|
@ -90,7 +90,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php"))
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r"^/works/\d+$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
alist = soup.findAll('a', href=re.compile(r"^/users/\w+/pseuds/\w+"))
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('div', {'class':"story border"}).find('span',{'class':'left'})
|
||||
title=a.text.split('"')[1]
|
||||
title=stripHTML(a).split('"')[1]
|
||||
self.story.setMetadata('title',title)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
break
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ class Asr3SlashzoneOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -207,7 +207,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -143,11 +143,11 @@ class CheckmatedComAdapter(BaseSiteAdapter):
|
|||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.findAll('span', {'class' : 'storytitle'})
|
||||
self.story.setMetadata('title',a[0].string)
|
||||
a = soup.find('span', {'class' : 'storytitle'})
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = a[1].find('a', href=re.compile(r"authors.php\?name\=\w+"))
|
||||
a = a.parent.find('a', href=re.compile(r"authors.php\?name\=\w+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
aut.extract()
|
||||
|
||||
a = a.string[:(len(a.string)-4)]
|
||||
self.story.setMetadata('title',a)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('select').findAll('option')
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ class DotMoonNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = body.find('b')
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url. http://www.dotmoon.net/board/index.php?action=profile;u=1'
|
||||
a = body.find('a', href=re.compile(r"index.php\?action=profile;u=\d+"))
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -154,7 +154,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ class EfictionEstelielDeAdapter(BaseSiteAdapter):
|
|||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
url=self.url
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
## title's right there...
|
||||
self.story.setMetadata('title',storya.string)
|
||||
self.story.setMetadata('title',stripHTML(storya))
|
||||
data = self._fetchUrl(url)
|
||||
soup = bs.BeautifulSoup(data)
|
||||
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
|
||||
|
|
@ -121,7 +121,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
## same way.
|
||||
chapsoup = bs.BeautifulSoup(chapterdata)
|
||||
storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
|
||||
self.story.setMetadata('title',storya.string)
|
||||
self.story.setMetadata('title',stripHTML(storya))
|
||||
del chapsoup
|
||||
|
||||
del chapterdata
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# title - first h4 tag will be title.
|
||||
titleh4 = soup.find('h4')
|
||||
self.story.setMetadata('title', titleh4.a.string)
|
||||
self.story.setMetadata('title', stripHTML(titleh4.a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"^/author/\d+"))
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'^/story/'+self.story.getMetadata('storyId')))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# self.story.setMetadata("title", storyMetadata["title"])
|
||||
# if not storyMetadata["title"]:
|
||||
|
|
|
|||
|
|
@ -142,7 +142,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"/a/\w+"))
|
||||
|
|
|
|||
|
|
@ -182,7 +182,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'\?psid='+self.story.getMetadata('storyId')))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
## javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid=290995'
|
||||
if "This story may contain adult themes." in a['href'] and not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ class HennethAnnunNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('h2', {'id':'page_heading'})
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters: chapter_view.cfm?stid=6663&spordinal=1"
|
||||
for chapter in soup.findAll('a', href=re.compile(r'chapter_view.cfm\?stid='+self.story.getMetadata('storyId')+"&spordinal=\d+$")):
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',aut.string)
|
||||
aut.extract()
|
||||
|
||||
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
|
||||
self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)])
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.find('select')
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
h = soup.find('a', id="blog_title")
|
||||
t = h.find('span')
|
||||
self.story.setMetadata('title',t.contents[0].string.strip())
|
||||
self.story.setMetadata('title',stripHTML(t.contents[0]).strip())
|
||||
|
||||
s = t.find('div')
|
||||
if s != None:
|
||||
|
|
@ -122,9 +122,6 @@ class InDeathNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl',a['href'])
|
||||
self.story.setMetadata('author',m.group('name'))
|
||||
|
||||
|
||||
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', title="View entry", href=re.compile(r'http://www.indeath.net/blog/'+self.story.getMetadata('storyId')+"/entry\-(\d+)\-([^/]*)/$"))
|
||||
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -107,10 +107,10 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
for font in title.findAll('font'):
|
||||
font.extract() # removes 'font' tags from inside the td.
|
||||
if title.has_key('colspan'):
|
||||
titlet = title.text
|
||||
titlet = stripHTML(title)
|
||||
else:
|
||||
## No colspan, it's part chapter title--even if it's a one-shot.
|
||||
titlet = ':'.join(title.text.split(':')[:-1]) # strip trailing 'Chapter X' or chapter title
|
||||
titlet = ':'.join(stripHTML(title).split(':')[:-1]) # strip trailing 'Chapter X' or chapter title
|
||||
self.story.setMetadata('title',titlet)
|
||||
## The story title is difficult to reliably parse from the
|
||||
## story pages. Getting it from the author page is, but costs
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('h1')
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"authorresults.php\?author=\d+"))
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('h1')
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"authorresults.php\?author=\d+"))
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ class NetRaptorOrgAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -156,7 +156,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
|
|||
a = info.find('a')
|
||||
if 'viewstory.php?sid='+self.story.getMetadata('storyId') == a['href'] or \
|
||||
('viewstory.php?sid='+self.story.getMetadata('storyId')+'&') in a['href']:
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
break
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ class NickAndGregNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
for info in asoup.findAll('table', {'class' : 'border'}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
break
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
b = soup.find('div', {'id' : 'nav25'})
|
||||
a = b.find('a', href=re.compile(r'fanfiction/story/'+self.story.getMetadata('storyId')+"/$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url. /fanfiction/stories.php?psid=125
|
||||
a = b.find('a', href=re.compile(r"/fanfiction/stories.php\?psid=\d+"))
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ class PommeDeSangComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile('viewstory.php\?sid=\d+'))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
|
|||
#find first metadata block
|
||||
mb = cell.div.findNextSibling('div')
|
||||
#Get meta...
|
||||
self.story.setMetadata('title', mb.b.string)
|
||||
self.story.setMetadata('title', stripHTML(mb.b))
|
||||
#strip out brackets on rating
|
||||
self.story.setMetadata('rating', mb.span.string[1:-1])
|
||||
#Completion status is denoted by the presence of this image:
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',aut.string)
|
||||
aut.extract()
|
||||
|
||||
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
|
||||
self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)])
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.find('select')
|
||||
|
|
|
|||
|
|
@ -126,10 +126,12 @@ class RestrictedSectionOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
self.story.setMetadata('authorId',ahref.split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+ahref)
|
||||
self.story.setMetadata('author',a.string)
|
||||
self.story.setMetadata('author',stripHTML(a))
|
||||
|
||||
# title, remove byauthorname.
|
||||
self.story.setMetadata('title',h2.text[:h2.text.index("by"+a.string)])
|
||||
auth=stripHTML(a)
|
||||
title=stripHTML(h2)
|
||||
self.story.setMetadata('title',title[:title.index(" by "+auth)])
|
||||
|
||||
dates = soup.findAll('span', {'class':'date'})
|
||||
if dates: # only for multi-chapter
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',aut.string)
|
||||
aut.extract()
|
||||
|
||||
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
|
||||
self.story.setMetadata('title',stripHTML(a)[:(len(stripHTML(a))-3)])
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.find('select')
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ class ScarHeadNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('div',{"id":"pagetitle"}).find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ class SimplyUndeniableComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('h1')
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
@ -155,7 +155,7 @@ class SimplyUndeniableComAdapter(BaseSiteAdapter):
|
|||
for info in asoup.findAll('table', {'cellpadding' : '5'}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
break
|
||||
|
||||
# Find the chapters:
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
# formating, so it's a little ugly.
|
||||
|
||||
title = authorSoup.find('a',{'href':'viewstory.php?sid='+self.story.getMetadata('storyId')})
|
||||
self.story.setMetadata('title',title.string)
|
||||
self.story.setMetadata('title',stripHTML(title))
|
||||
titleblock=title.parent.parent
|
||||
|
||||
chapterselect=soup.find('select',{'name':'chapter'})
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',aut.string)
|
||||
aut.extract()
|
||||
|
||||
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
|
||||
self.story.setMetadata('title',stripHTML(a)[:(len(stripHTML(a))-3)])
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('div', {'class' : 'content'})
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
|
||||
a.find('em').extract()
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters: chapterview.asp?sid=7000&cid=30919
|
||||
chapters=soup.findAll('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+"&cid=\d+$"))
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -177,7 +177,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
for info in asoup.findAll('td', {'class' : 'highlightcolor1'}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
found = 1
|
||||
break
|
||||
index=index+1
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ class TokraFandomnetComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('h1')
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.find('ol', {'class' : 'chapters'})
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php"))
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = pagetitlediv.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitlediv.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
raise e
|
||||
|
||||
# pull title(title) and author from the HTML title.
|
||||
title = soup.find('title').string
|
||||
title = stripHTML(soup.find('title'))
|
||||
logger.debug('Title: %s' % title)
|
||||
title = title.split('::')[1].strip()
|
||||
self.story.setMetadata('title',title.split(' by ')[0].strip())
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.text)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
alist = pt.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
|
|
|
|||
Loading…
Reference in a new issue