Strip HTML from story titles in all the adapters that don't already.

This commit is contained in:
Jim Miller 2013-08-21 09:19:58 -05:00
parent b5d00951de
commit 31b7257496
75 changed files with 83 additions and 84 deletions

View file

@ -90,7 +90,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php"))

View file

@ -163,7 +163,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r"^/works/\d+$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
alist = soup.findAll('a', href=re.compile(r"^/users/\w+/pseuds/\w+"))

View file

@ -101,7 +101,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
## Title
a = soup.find('div', {'class':"story border"}).find('span',{'class':'left'})
title=a.text.split('"')[1]
title=stripHTML(a).split('"')[1]
self.story.setMetadata('title',title)
# Find authorid and URL from... author url.

View file

@ -157,7 +157,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None:
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
break

View file

@ -128,7 +128,7 @@ class Asr3SlashzoneOrgAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -207,7 +207,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -180,7 +180,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
pagetitle = soup.find('div',{'id':'pagetitle'})
## Title
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -116,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
## Title
pt = soup.find('div', {'id' : 'pagetitle'})
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -143,11 +143,11 @@ class CheckmatedComAdapter(BaseSiteAdapter):
# Now go hunting for all the meta data and the chapter list.
## Title
a = soup.findAll('span', {'class' : 'storytitle'})
self.story.setMetadata('title',a[0].string)
a = soup.find('span', {'class' : 'storytitle'})
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = a[1].find('a', href=re.compile(r"authors.php\?name\=\w+"))
a = a.parent.find('a', href=re.compile(r"authors.php\?name\=\w+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)

View file

@ -131,7 +131,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -164,7 +164,7 @@ class DokugaComAdapter(BaseSiteAdapter):
aut.extract()
a = a.string[:(len(a.string)-4)]
self.story.setMetadata('title',a)
self.story.setMetadata('title',stripHTML(a))
# Find the chapters:
chapters = soup.find('select').findAll('option')

View file

@ -141,7 +141,7 @@ class DotMoonNetAdapter(BaseSiteAdapter):
## Title
a = body.find('b')
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url. http://www.dotmoon.net/board/index.php?action=profile;u=1'
a = body.find('a', href=re.compile(r"index.php\?action=profile;u=\d+"))

View file

@ -170,7 +170,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -154,7 +154,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -104,7 +104,7 @@ class EfictionEstelielDeAdapter(BaseSiteAdapter):
pagetitle = soup.find('div',{'id':'pagetitle'})
## Title
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -132,7 +132,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
## Title
pt = soup.find('div', {'id' : 'pagetitle'})
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -111,7 +111,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
url=self.url
logger.debug("Normalizing to URL: "+url)
## title's right there...
self.story.setMetadata('title',storya.string)
self.story.setMetadata('title',stripHTML(storya))
data = self._fetchUrl(url)
soup = bs.BeautifulSoup(data)
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
@ -121,7 +121,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
## same way.
chapsoup = bs.BeautifulSoup(chapterdata)
storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
self.story.setMetadata('title',storya.string)
self.story.setMetadata('title',stripHTML(storya))
del chapsoup
del chapterdata

View file

@ -116,7 +116,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# title - first h4 tag will be title.
titleh4 = soup.find('h4')
self.story.setMetadata('title', titleh4.a.string)
self.story.setMetadata('title', stripHTML(titleh4.a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"^/author/\d+"))

View file

@ -117,7 +117,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'^/story/'+self.story.getMetadata('storyId')))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# self.story.setMetadata("title", storyMetadata["title"])
# if not storyMetadata["title"]:

View file

@ -142,7 +142,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"/a/\w+"))

View file

@ -182,7 +182,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -92,7 +92,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'\?psid='+self.story.getMetadata('storyId')))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
## javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid=290995'
if "This story may contain adult themes." in a['href'] and not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url)

View file

@ -101,7 +101,7 @@ class HennethAnnunNetAdapter(BaseSiteAdapter):
## Title
a = soup.find('h2', {'id':'page_heading'})
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find the chapters: chapter_view.cfm?stid=6663&spordinal=1"
for chapter in soup.findAll('a', href=re.compile(r'chapter_view.cfm\?stid='+self.story.getMetadata('storyId')+"&spordinal=\d+$")):

View file

@ -106,7 +106,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
self.story.setMetadata('author',aut.string)
aut.extract()
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)])
# Find the chapters:
chapters=soup.find('select')

View file

@ -105,7 +105,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -183,7 +183,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -170,7 +170,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -109,7 +109,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
## Title
h = soup.find('a', id="blog_title")
t = h.find('span')
self.story.setMetadata('title',t.contents[0].string.strip())
self.story.setMetadata('title',stripHTML(t.contents[0]).strip())
s = t.find('div')
if s != None:
@ -122,9 +122,6 @@ class InDeathNetAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl',a['href'])
self.story.setMetadata('author',m.group('name'))
# Find the chapters:
chapters=soup.findAll('a', title="View entry", href=re.compile(r'http://www.indeath.net/blog/'+self.story.getMetadata('storyId')+"/entry\-(\d+)\-([^/]*)/$"))

View file

@ -129,7 +129,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -116,7 +116,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
## Title
pt = soup.find('div', {'id' : 'pagetitle'})
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -107,10 +107,10 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
for font in title.findAll('font'):
font.extract() # removes 'font' tags from inside the td.
if title.has_key('colspan'):
titlet = title.text
titlet = stripHTML(title)
else:
## No colspan, it's part chapter title--even if it's a one-shot.
titlet = ':'.join(title.text.split(':')[:-1]) # strip trailing 'Chapter X' or chapter title
titlet = ':'.join(stripHTML(title).split(':')[:-1]) # strip trailing 'Chapter X' or chapter title
self.story.setMetadata('title',titlet)
## The story title is difficult to reliably parse from the
## story pages. Getting it from the author page is, but costs

View file

@ -170,7 +170,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -193,7 +193,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -104,7 +104,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
## Title
a = soup.find('h1')
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"authorresults.php\?author=\d+"))

View file

@ -103,7 +103,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
## Title
a = soup.find('h1')
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"authorresults.php\?author=\d+"))

View file

@ -97,7 +97,7 @@ class NetRaptorOrgAdapter(BaseSiteAdapter):
## Title
pagetitle = soup.find('div',{'id':'pagetitle'})
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -156,7 +156,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -115,7 +115,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
a = info.find('a')
if 'viewstory.php?sid='+self.story.getMetadata('storyId') == a['href'] or \
('viewstory.php?sid='+self.story.getMetadata('storyId')+'&') in a['href']:
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
break

View file

@ -101,7 +101,7 @@ class NickAndGregNetAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -158,7 +158,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
for info in asoup.findAll('table', {'class' : 'border'}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None:
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
break

View file

@ -158,7 +158,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -136,7 +136,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
## Title
b = soup.find('div', {'id' : 'nav25'})
a = b.find('a', href=re.compile(r'fanfiction/story/'+self.story.getMetadata('storyId')+"/$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url. /fanfiction/stories.php?psid=125
a = b.find('a', href=re.compile(r"/fanfiction/stories.php\?psid=\d+"))

View file

@ -180,7 +180,7 @@ class PommeDeSangComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile('viewstory.php\?sid=\d+'))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -136,7 +136,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -83,7 +83,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -147,7 +147,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
#find first metadata block
mb = cell.div.findNextSibling('div')
#Get meta...
self.story.setMetadata('title', mb.b.string)
self.story.setMetadata('title', stripHTML(mb.b))
#strip out brackets on rating
self.story.setMetadata('rating', mb.span.string[1:-1])
#Completion status is denoted by the presence of this image:

View file

@ -181,7 +181,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
## Title
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -134,7 +134,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -130,7 +130,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -138,7 +138,7 @@ class QafFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',aut.string)
aut.extract()
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)])
# Find the chapters:
chapters=soup.find('select')

View file

@ -126,10 +126,12 @@ class RestrictedSectionOrgSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('authorId',ahref.split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+ahref)
self.story.setMetadata('author',a.string)
self.story.setMetadata('author',stripHTML(a))
# title, remove byauthorname.
self.story.setMetadata('title',h2.text[:h2.text.index("by"+a.string)])
auth=stripHTML(a)
title=stripHTML(h2)
self.story.setMetadata('title',title[:title.index(" by "+auth)])
dates = soup.findAll('span', {'class':'date'})
if dates: # only for multi-chapter

View file

@ -110,7 +110,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
self.story.setMetadata('author',aut.string)
aut.extract()
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
self.story.setMetadata('title',stripHTML(a)[:(len(stripHTML(a))-3)])
# Find the chapters:
chapters=soup.find('select')

View file

@ -181,7 +181,7 @@ class ScarHeadNetAdapter(BaseSiteAdapter):
## Title
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -139,7 +139,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('div',{"id":"pagetitle"}).find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -140,7 +140,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -143,7 +143,7 @@ class SimplyUndeniableComAdapter(BaseSiteAdapter):
## Title
a = soup.find('h1')
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
@ -155,7 +155,7 @@ class SimplyUndeniableComAdapter(BaseSiteAdapter):
for info in asoup.findAll('table', {'cellpadding' : '5'}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None:
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
break
# Find the chapters:

View file

@ -130,7 +130,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -122,7 +122,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
# formating, so it's a little ugly.
title = authorSoup.find('a',{'href':'viewstory.php?sid='+self.story.getMetadata('storyId')})
self.story.setMetadata('title',title.string)
self.story.setMetadata('title',stripHTML(title))
titleblock=title.parent.parent
chapterselect=soup.find('select',{'name':'chapter'})

View file

@ -106,7 +106,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('author',aut.string)
aut.extract()
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
self.story.setMetadata('title',stripHTML(a)[:(len(stripHTML(a))-3)])
# Find the chapters:
chapters=soup.findAll('div', {'class' : 'content'})

View file

@ -104,7 +104,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl')))
a.find('em').extract()
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
# Find the chapters: chapterview.asp?sid=7000&cid=30919
chapters=soup.findAll('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+"&cid=\d+$"))

View file

@ -133,7 +133,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -99,7 +99,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -179,7 +179,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -177,7 +177,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -165,7 +165,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
for info in asoup.findAll('td', {'class' : 'highlightcolor1'}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None:
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
found = 1
break
index=index+1

View file

@ -165,7 +165,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -131,7 +131,7 @@ class TokraFandomnetComAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -100,7 +100,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
## Title
a = soup.find('h1')
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find the chapters:
chapters=soup.find('ol', {'class' : 'chapters'})

View file

@ -130,7 +130,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php"))

View file

@ -144,7 +144,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
## Title
a = pagetitlediv.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pagetitlediv.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -115,7 +115,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -70,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
raise e
# pull title(title) and author from the HTML title.
title = soup.find('title').string
title = stripHTML(soup.find('title'))
logger.debug('Title: %s' % title)
title = title.split('::')[1].strip()
self.story.setMetadata('title',title.split(' by ')[0].strip())

View file

@ -171,7 +171,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
## Title
pt = soup.find('div', {'id' : 'pagetitle'})
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -101,7 +101,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
## Title
pt = soup.find('div', {'id' : 'pagetitle'})
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.text)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))

View file

@ -113,7 +113,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
## Title
pt = soup.find('div', {'id' : 'pagetitle'})
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
alist = pt.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))