diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py index 9c712846..f51364a3 100644 --- a/fanficdownloader/adapters/adapter_adastrafanficcom.py +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -90,7 +90,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php")) diff --git a/fanficdownloader/adapters/adapter_archiveofourownorg.py b/fanficdownloader/adapters/adapter_archiveofourownorg.py index ac54f2e2..6aa4ad02 100644 --- a/fanficdownloader/adapters/adapter_archiveofourownorg.py +++ b/fanficdownloader/adapters/adapter_archiveofourownorg.py @@ -163,7 +163,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r"^/works/\d+$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. alist = soup.findAll('a', href=re.compile(r"^/users/\w+/pseuds/\w+")) diff --git a/fanficdownloader/adapters/adapter_archiveskyehawkecom.py b/fanficdownloader/adapters/adapter_archiveskyehawkecom.py index 0cbfcbd9..6e853380 100644 --- a/fanficdownloader/adapters/adapter_archiveskyehawkecom.py +++ b/fanficdownloader/adapters/adapter_archiveskyehawkecom.py @@ -101,7 +101,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter): ## Title a = soup.find('div', {'class':"story border"}).find('span',{'class':'left'}) - title=a.text.split('"')[1] + title=stripHTML(a).split('"')[1] self.story.setMetadata('title',title) # Find authorid and URL from... author url. diff --git a/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py b/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py index 9af3bbfc..9faf2bfd 100644 --- a/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py @@ -157,7 +157,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter): for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}): a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) if a != None: - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) break diff --git a/fanficdownloader/adapters/adapter_asr3slashzoneorg.py b/fanficdownloader/adapters/adapter_asr3slashzoneorg.py index 56fa9c5a..7d0c2a90 100644 --- a/fanficdownloader/adapters/adapter_asr3slashzoneorg.py +++ b/fanficdownloader/adapters/adapter_asr3slashzoneorg.py @@ -128,7 +128,7 @@ class Asr3SlashzoneOrgAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_bloodtiesfancom.py b/fanficdownloader/adapters/adapter_bloodtiesfancom.py index c9f0eb17..2e5748eb 100644 --- a/fanficdownloader/adapters/adapter_bloodtiesfancom.py +++ b/fanficdownloader/adapters/adapter_bloodtiesfancom.py @@ -207,7 +207,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_castlefansorg.py b/fanficdownloader/adapters/adapter_castlefansorg.py index 60f8607d..f1fc29d6 100644 --- a/fanficdownloader/adapters/adapter_castlefansorg.py +++ b/fanficdownloader/adapters/adapter_castlefansorg.py @@ -180,7 +180,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX pagetitle = soup.find('div',{'id':'pagetitle'}) ## Title a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_chaossycophanthexcom.py b/fanficdownloader/adapters/adapter_chaossycophanthexcom.py index 8160b1e2..3c1853ec 100644 --- a/fanficdownloader/adapters/adapter_chaossycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_chaossycophanthexcom.py @@ -116,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter): ## Title pt = soup.find('div', {'id' : 'pagetitle'}) a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_checkmatedcom.py b/fanficdownloader/adapters/adapter_checkmatedcom.py index 72be1f49..b1ca1254 100644 --- a/fanficdownloader/adapters/adapter_checkmatedcom.py +++ b/fanficdownloader/adapters/adapter_checkmatedcom.py @@ -143,11 +143,11 @@ class CheckmatedComAdapter(BaseSiteAdapter): # Now go hunting for all the meta data and the chapter list. ## Title - a = soup.findAll('span', {'class' : 'storytitle'}) - self.story.setMetadata('title',a[0].string) + a = soup.find('span', {'class' : 'storytitle'}) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. - a = a[1].find('a', href=re.compile(r"authors.php\?name\=\w+")) + a = a.parent.find('a', href=re.compile(r"authors.php\?name\=\w+")) self.story.setMetadata('authorId',a['href'].split('=')[1]) self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) self.story.setMetadata('author',a.string) diff --git a/fanficdownloader/adapters/adapter_destinysgatewaycom.py b/fanficdownloader/adapters/adapter_destinysgatewaycom.py index 61b300aa..adf41d2c 100644 --- a/fanficdownloader/adapters/adapter_destinysgatewaycom.py +++ b/fanficdownloader/adapters/adapter_destinysgatewaycom.py @@ -131,7 +131,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_dokugacom.py b/fanficdownloader/adapters/adapter_dokugacom.py index 826b7c97..6a21e800 100644 --- a/fanficdownloader/adapters/adapter_dokugacom.py +++ b/fanficdownloader/adapters/adapter_dokugacom.py @@ -164,7 +164,7 @@ class DokugaComAdapter(BaseSiteAdapter): aut.extract() a = a.string[:(len(a.string)-4)] - self.story.setMetadata('title',a) + self.story.setMetadata('title',stripHTML(a)) # Find the chapters: chapters = soup.find('select').findAll('option') diff --git a/fanficdownloader/adapters/adapter_dotmoonnet.py b/fanficdownloader/adapters/adapter_dotmoonnet.py index af6cfa37..e2dedb7d 100644 --- a/fanficdownloader/adapters/adapter_dotmoonnet.py +++ b/fanficdownloader/adapters/adapter_dotmoonnet.py @@ -141,7 +141,7 @@ class DotMoonNetAdapter(BaseSiteAdapter): ## Title a = body.find('b') - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. http://www.dotmoon.net/board/index.php?action=profile;u=1' a = body.find('a', href=re.compile(r"index.php\?action=profile;u=\d+")) diff --git a/fanficdownloader/adapters/adapter_dracoandginnycom.py b/fanficdownloader/adapters/adapter_dracoandginnycom.py index c1b5b5db..f666249b 100644 --- a/fanficdownloader/adapters/adapter_dracoandginnycom.py +++ b/fanficdownloader/adapters/adapter_dracoandginnycom.py @@ -170,7 +170,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_dramioneorg.py b/fanficdownloader/adapters/adapter_dramioneorg.py index f3b4a46b..6dd0a391 100644 --- a/fanficdownloader/adapters/adapter_dramioneorg.py +++ b/fanficdownloader/adapters/adapter_dramioneorg.py @@ -154,7 +154,7 @@ class DramioneOrgAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_efictionestelielde.py b/fanficdownloader/adapters/adapter_efictionestelielde.py index 80362786..4343a2b6 100644 --- a/fanficdownloader/adapters/adapter_efictionestelielde.py +++ b/fanficdownloader/adapters/adapter_efictionestelielde.py @@ -104,7 +104,7 @@ class EfictionEstelielDeAdapter(BaseSiteAdapter): pagetitle = soup.find('div',{'id':'pagetitle'}) ## Title a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py b/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py index 4602ae7b..6835212c 100644 --- a/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py @@ -132,7 +132,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter): ## Title pt = soup.find('div', {'id' : 'pagetitle'}) a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_fictionalleyorg.py b/fanficdownloader/adapters/adapter_fictionalleyorg.py index 67787aab..b9fb4494 100644 --- a/fanficdownloader/adapters/adapter_fictionalleyorg.py +++ b/fanficdownloader/adapters/adapter_fictionalleyorg.py @@ -111,7 +111,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): url=self.url logger.debug("Normalizing to URL: "+url) ## title's right there... - self.story.setMetadata('title',storya.string) + self.story.setMetadata('title',stripHTML(storya)) data = self._fetchUrl(url) soup = bs.BeautifulSoup(data) chapterlinklist = soup.findAll('a',{'class':'chapterlink'}) @@ -121,7 +121,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): ## same way. chapsoup = bs.BeautifulSoup(chapterdata) storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1] - self.story.setMetadata('title',storya.string) + self.story.setMetadata('title',stripHTML(storya)) del chapsoup del chapterdata diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py index 7c16fd7c..4446df73 100644 --- a/fanficdownloader/adapters/adapter_ficwadcom.py +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -116,7 +116,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter): # title - first h4 tag will be title. titleh4 = soup.find('h4') - self.story.setMetadata('title', titleh4.a.string) + self.story.setMetadata('title', stripHTML(titleh4.a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"^/author/\d+")) diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py index d3379500..b5cd487a 100644 --- a/fanficdownloader/adapters/adapter_fimfictionnet.py +++ b/fanficdownloader/adapters/adapter_fimfictionnet.py @@ -117,7 +117,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'^/story/'+self.story.getMetadata('storyId'))) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # self.story.setMetadata("title", storyMetadata["title"]) # if not storyMetadata["title"]: diff --git a/fanficdownloader/adapters/adapter_finestoriescom.py b/fanficdownloader/adapters/adapter_finestoriescom.py index 2bb42394..c5b626b0 100644 --- a/fanficdownloader/adapters/adapter_finestoriescom.py +++ b/fanficdownloader/adapters/adapter_finestoriescom.py @@ -142,7 +142,7 @@ class FineStoriesComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"/a/\w+")) diff --git a/fanficdownloader/adapters/adapter_grangerenchantedcom.py b/fanficdownloader/adapters/adapter_grangerenchantedcom.py index cb9caf71..a39f738e 100644 --- a/fanficdownloader/adapters/adapter_grangerenchantedcom.py +++ b/fanficdownloader/adapters/adapter_grangerenchantedcom.py @@ -182,7 +182,7 @@ class GrangerEnchantedCom(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py index 7a012dc3..2fc64dc3 100644 --- a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -92,7 +92,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'\?psid='+self.story.getMetadata('storyId'))) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) ## javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid=290995' if "This story may contain adult themes." in a['href'] and not (self.is_adult or self.getConfig("is_adult")): raise exceptions.AdultCheckRequired(self.url) diff --git a/fanficdownloader/adapters/adapter_hennethannunnet.py b/fanficdownloader/adapters/adapter_hennethannunnet.py index 015fdcf2..340e8c2a 100644 --- a/fanficdownloader/adapters/adapter_hennethannunnet.py +++ b/fanficdownloader/adapters/adapter_hennethannunnet.py @@ -101,7 +101,7 @@ class HennethAnnunNetAdapter(BaseSiteAdapter): ## Title a = soup.find('h2', {'id':'page_heading'}) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find the chapters: chapter_view.cfm?stid=6663&spordinal=1" for chapter in soup.findAll('a', href=re.compile(r'chapter_view.cfm\?stid='+self.story.getMetadata('storyId')+"&spordinal=\d+$")): diff --git a/fanficdownloader/adapters/adapter_hlfictionnet.py b/fanficdownloader/adapters/adapter_hlfictionnet.py index c7edf4be..5d15f7db 100644 --- a/fanficdownloader/adapters/adapter_hlfictionnet.py +++ b/fanficdownloader/adapters/adapter_hlfictionnet.py @@ -106,7 +106,7 @@ class HLFictionNetAdapter(BaseSiteAdapter): self.story.setMetadata('author',aut.string) aut.extract() - self.story.setMetadata('title',a.string[:(len(a.string)-3)]) + self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)]) # Find the chapters: chapters=soup.find('select') diff --git a/fanficdownloader/adapters/adapter_hpfanficarchivecom.py b/fanficdownloader/adapters/adapter_hpfanficarchivecom.py index a7b8fee2..8a57e359 100644 --- a/fanficdownloader/adapters/adapter_hpfanficarchivecom.py +++ b/fanficdownloader/adapters/adapter_hpfanficarchivecom.py @@ -105,7 +105,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_iketernalnet.py b/fanficdownloader/adapters/adapter_iketernalnet.py index dd2aa1bc..a89ff8f4 100644 --- a/fanficdownloader/adapters/adapter_iketernalnet.py +++ b/fanficdownloader/adapters/adapter_iketernalnet.py @@ -183,7 +183,7 @@ class IkEternalNetAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_imagineeficcom.py b/fanficdownloader/adapters/adapter_imagineeficcom.py index b524fddd..e080befb 100644 --- a/fanficdownloader/adapters/adapter_imagineeficcom.py +++ b/fanficdownloader/adapters/adapter_imagineeficcom.py @@ -170,7 +170,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_indeathnet.py b/fanficdownloader/adapters/adapter_indeathnet.py index 4988bef0..4d6fd93d 100644 --- a/fanficdownloader/adapters/adapter_indeathnet.py +++ b/fanficdownloader/adapters/adapter_indeathnet.py @@ -109,7 +109,7 @@ class InDeathNetAdapter(BaseSiteAdapter): ## Title h = soup.find('a', id="blog_title") t = h.find('span') - self.story.setMetadata('title',t.contents[0].string.strip()) + self.story.setMetadata('title',stripHTML(t.contents[0]).strip()) s = t.find('div') if s != None: @@ -122,9 +122,6 @@ class InDeathNetAdapter(BaseSiteAdapter): self.story.setMetadata('authorUrl',a['href']) self.story.setMetadata('author',m.group('name')) - - - # Find the chapters: chapters=soup.findAll('a', title="View entry", href=re.compile(r'http://www.indeath.net/blog/'+self.story.getMetadata('storyId')+"/entry\-(\d+)\-([^/]*)/$")) diff --git a/fanficdownloader/adapters/adapter_libraryofmoriacom.py b/fanficdownloader/adapters/adapter_libraryofmoriacom.py index 57083b2a..6f8ff7fe 100644 --- a/fanficdownloader/adapters/adapter_libraryofmoriacom.py +++ b/fanficdownloader/adapters/adapter_libraryofmoriacom.py @@ -129,7 +129,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py index 07c416b5..93b8b761 100644 --- a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py @@ -116,7 +116,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter): ## Title pt = soup.find('div', {'id' : 'pagetitle'}) a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_mediaminerorg.py b/fanficdownloader/adapters/adapter_mediaminerorg.py index 93d7abe7..b9c46414 100644 --- a/fanficdownloader/adapters/adapter_mediaminerorg.py +++ b/fanficdownloader/adapters/adapter_mediaminerorg.py @@ -107,10 +107,10 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): for font in title.findAll('font'): font.extract() # removes 'font' tags from inside the td. if title.has_key('colspan'): - titlet = title.text + titlet = stripHTML(title) else: ## No colspan, it's part chapter title--even if it's a one-shot. - titlet = ':'.join(title.text.split(':')[:-1]) # strip trailing 'Chapter X' or chapter title + titlet = ':'.join(stripHTML(title).split(':')[:-1]) # strip trailing 'Chapter X' or chapter title self.story.setMetadata('title',titlet) ## The story title is difficult to reliably parse from the ## story pages. Getting it from the author page is, but costs diff --git a/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py b/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py index 480e4548..91b8d6e5 100644 --- a/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py +++ b/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py @@ -170,7 +170,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_mugglenetcom.py b/fanficdownloader/adapters/adapter_mugglenetcom.py index d3bf5a25..31599fbd 100644 --- a/fanficdownloader/adapters/adapter_mugglenetcom.py +++ b/fanficdownloader/adapters/adapter_mugglenetcom.py @@ -193,7 +193,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_nationallibrarynet.py b/fanficdownloader/adapters/adapter_nationallibrarynet.py index c4ad08dc..7e1947db 100644 --- a/fanficdownloader/adapters/adapter_nationallibrarynet.py +++ b/fanficdownloader/adapters/adapter_nationallibrarynet.py @@ -104,7 +104,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter): ## Title a = soup.find('h1') - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"authorresults.php\?author=\d+")) diff --git a/fanficdownloader/adapters/adapter_ncisficcom.py b/fanficdownloader/adapters/adapter_ncisficcom.py index 85d7a6e3..4e6a4482 100644 --- a/fanficdownloader/adapters/adapter_ncisficcom.py +++ b/fanficdownloader/adapters/adapter_ncisficcom.py @@ -103,7 +103,7 @@ class NCISFicComAdapter(BaseSiteAdapter): ## Title a = soup.find('h1') - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"authorresults.php\?author=\d+")) diff --git a/fanficdownloader/adapters/adapter_netraptororg.py b/fanficdownloader/adapters/adapter_netraptororg.py index 86082f4c..524d8070 100644 --- a/fanficdownloader/adapters/adapter_netraptororg.py +++ b/fanficdownloader/adapters/adapter_netraptororg.py @@ -97,7 +97,7 @@ class NetRaptorOrgAdapter(BaseSiteAdapter): ## Title pagetitle = soup.find('div',{'id':'pagetitle'}) a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_nfacommunitycom.py b/fanficdownloader/adapters/adapter_nfacommunitycom.py index 01cdb42e..98aa2155 100644 --- a/fanficdownloader/adapters/adapter_nfacommunitycom.py +++ b/fanficdownloader/adapters/adapter_nfacommunitycom.py @@ -156,7 +156,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_nhamagicalworldsus.py b/fanficdownloader/adapters/adapter_nhamagicalworldsus.py index f8574f91..91fc965f 100644 --- a/fanficdownloader/adapters/adapter_nhamagicalworldsus.py +++ b/fanficdownloader/adapters/adapter_nhamagicalworldsus.py @@ -115,7 +115,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter): a = info.find('a') if 'viewstory.php?sid='+self.story.getMetadata('storyId') == a['href'] or \ ('viewstory.php?sid='+self.story.getMetadata('storyId')+'&') in a['href']: - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) break diff --git a/fanficdownloader/adapters/adapter_nickandgregnet.py b/fanficdownloader/adapters/adapter_nickandgregnet.py index 9ece5a85..9a19c11d 100644 --- a/fanficdownloader/adapters/adapter_nickandgregnet.py +++ b/fanficdownloader/adapters/adapter_nickandgregnet.py @@ -101,7 +101,7 @@ class NickAndGregNetAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py index 2dc80b2c..475c87ca 100644 --- a/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py @@ -158,7 +158,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter): for info in asoup.findAll('table', {'class' : 'border'}): a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) if a != None: - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) break diff --git a/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py b/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py index 596244bd..dc679689 100644 --- a/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py +++ b/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py @@ -158,7 +158,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_phoenixsongnet.py b/fanficdownloader/adapters/adapter_phoenixsongnet.py index 9de6bb8b..f1da893a 100644 --- a/fanficdownloader/adapters/adapter_phoenixsongnet.py +++ b/fanficdownloader/adapters/adapter_phoenixsongnet.py @@ -136,7 +136,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): ## Title b = soup.find('div', {'id' : 'nav25'}) a = b.find('a', href=re.compile(r'fanfiction/story/'+self.story.getMetadata('storyId')+"/$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. /fanfiction/stories.php?psid=125 a = b.find('a', href=re.compile(r"/fanfiction/stories.php\?psid=\d+")) diff --git a/fanficdownloader/adapters/adapter_pommedesangcom.py b/fanficdownloader/adapters/adapter_pommedesangcom.py index 15edf9eb..ffa7f8fa 100644 --- a/fanficdownloader/adapters/adapter_pommedesangcom.py +++ b/fanficdownloader/adapters/adapter_pommedesangcom.py @@ -180,7 +180,7 @@ class PommeDeSangComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile('viewstory.php\?sid=\d+')) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py b/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py index 9a90aa4d..b08b542f 100644 --- a/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py +++ b/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py @@ -136,7 +136,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py index 4427e9bf..58624e6a 100644 --- a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -83,7 +83,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_potterficscom.py b/fanficdownloader/adapters/adapter_potterficscom.py index 730a8986..84b9d229 100644 --- a/fanficdownloader/adapters/adapter_potterficscom.py +++ b/fanficdownloader/adapters/adapter_potterficscom.py @@ -147,7 +147,7 @@ class PotterFicsComAdapter(BaseSiteAdapter): #find first metadata block mb = cell.div.findNextSibling('div') #Get meta... - self.story.setMetadata('title', mb.b.string) + self.story.setMetadata('title', stripHTML(mb.b)) #strip out brackets on rating self.story.setMetadata('rating', mb.span.string[1:-1]) #Completion status is denoted by the presence of this image: diff --git a/fanficdownloader/adapters/adapter_potterheadsanonymouscom.py b/fanficdownloader/adapters/adapter_potterheadsanonymouscom.py index d3874748..7fc36d80 100644 --- a/fanficdownloader/adapters/adapter_potterheadsanonymouscom.py +++ b/fanficdownloader/adapters/adapter_potterheadsanonymouscom.py @@ -181,7 +181,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter): ## Title a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_pretendercentrecom.py b/fanficdownloader/adapters/adapter_pretendercentrecom.py index ef65b71d..4097eccc 100644 --- a/fanficdownloader/adapters/adapter_pretendercentrecom.py +++ b/fanficdownloader/adapters/adapter_pretendercentrecom.py @@ -134,7 +134,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_psychficcom.py b/fanficdownloader/adapters/adapter_psychficcom.py index 33f53d8f..bb69eea6 100644 --- a/fanficdownloader/adapters/adapter_psychficcom.py +++ b/fanficdownloader/adapters/adapter_psychficcom.py @@ -130,7 +130,7 @@ class PsychFicComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_qafficcom.py b/fanficdownloader/adapters/adapter_qafficcom.py index f5573519..ce95780b 100644 --- a/fanficdownloader/adapters/adapter_qafficcom.py +++ b/fanficdownloader/adapters/adapter_qafficcom.py @@ -138,7 +138,7 @@ class QafFicComAdapter(BaseSiteAdapter): self.story.setMetadata('author',aut.string) aut.extract() - self.story.setMetadata('title',a.string[:(len(a.string)-3)]) + self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)]) # Find the chapters: chapters=soup.find('select') diff --git a/fanficdownloader/adapters/adapter_restrictedsectionorg.py b/fanficdownloader/adapters/adapter_restrictedsectionorg.py index ce02ca08..686e053c 100644 --- a/fanficdownloader/adapters/adapter_restrictedsectionorg.py +++ b/fanficdownloader/adapters/adapter_restrictedsectionorg.py @@ -126,10 +126,12 @@ class RestrictedSectionOrgSiteAdapter(BaseSiteAdapter): self.story.setMetadata('authorId',ahref.split('=')[1]) self.story.setMetadata('authorUrl','http://'+self.host+'/'+ahref) - self.story.setMetadata('author',a.string) + self.story.setMetadata('author',stripHTML(a)) # title, remove byauthorname. - self.story.setMetadata('title',h2.text[:h2.text.index("by"+a.string)]) + auth=stripHTML(a) + title=stripHTML(h2) + self.story.setMetadata('title',title[:title.index(" by "+auth)]) dates = soup.findAll('span', {'class':'date'}) if dates: # only for multi-chapter diff --git a/fanficdownloader/adapters/adapter_samdeanarchivenu.py b/fanficdownloader/adapters/adapter_samdeanarchivenu.py index c0549a56..3c2bf275 100644 --- a/fanficdownloader/adapters/adapter_samdeanarchivenu.py +++ b/fanficdownloader/adapters/adapter_samdeanarchivenu.py @@ -110,7 +110,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter): self.story.setMetadata('author',aut.string) aut.extract() - self.story.setMetadata('title',a.string[:(len(a.string)-3)]) + self.story.setMetadata('title',stripHTML(a)[:(len(stripHTML(a))-3)]) # Find the chapters: chapters=soup.find('select') diff --git a/fanficdownloader/adapters/adapter_scarheadnet.py b/fanficdownloader/adapters/adapter_scarheadnet.py index 736c047e..593abc68 100644 --- a/fanficdownloader/adapters/adapter_scarheadnet.py +++ b/fanficdownloader/adapters/adapter_scarheadnet.py @@ -181,7 +181,7 @@ class ScarHeadNetAdapter(BaseSiteAdapter): ## Title a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py b/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py index 72675993..3a46bb7d 100644 --- a/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py +++ b/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py @@ -139,7 +139,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('div',{"id":"pagetitle"}).find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_sg1heliopoliscom.py b/fanficdownloader/adapters/adapter_sg1heliopoliscom.py index b8a9e229..aba87c0f 100644 --- a/fanficdownloader/adapters/adapter_sg1heliopoliscom.py +++ b/fanficdownloader/adapters/adapter_sg1heliopoliscom.py @@ -140,7 +140,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_simplyundeniablecom.py b/fanficdownloader/adapters/adapter_simplyundeniablecom.py index 990e372a..2277ac36 100644 --- a/fanficdownloader/adapters/adapter_simplyundeniablecom.py +++ b/fanficdownloader/adapters/adapter_simplyundeniablecom.py @@ -143,7 +143,7 @@ class SimplyUndeniableComAdapter(BaseSiteAdapter): ## Title a = soup.find('h1') - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) @@ -155,7 +155,7 @@ class SimplyUndeniableComAdapter(BaseSiteAdapter): for info in asoup.findAll('table', {'cellpadding' : '5'}): a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) if a != None: - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) break # Find the chapters: diff --git a/fanficdownloader/adapters/adapter_sinfuldesireorg.py b/fanficdownloader/adapters/adapter_sinfuldesireorg.py index d6a96a73..c1e98206 100644 --- a/fanficdownloader/adapters/adapter_sinfuldesireorg.py +++ b/fanficdownloader/adapters/adapter_sinfuldesireorg.py @@ -130,7 +130,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_squidgeorgpeja.py b/fanficdownloader/adapters/adapter_squidgeorgpeja.py index ba2d3e64..b79bb6c5 100644 --- a/fanficdownloader/adapters/adapter_squidgeorgpeja.py +++ b/fanficdownloader/adapters/adapter_squidgeorgpeja.py @@ -122,7 +122,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter): # formating, so it's a little ugly. title = authorSoup.find('a',{'href':'viewstory.php?sid='+self.story.getMetadata('storyId')}) - self.story.setMetadata('title',title.string) + self.story.setMetadata('title',stripHTML(title)) titleblock=title.parent.parent chapterselect=soup.find('select',{'name':'chapter'}) diff --git a/fanficdownloader/adapters/adapter_stargateatlantisorg.py b/fanficdownloader/adapters/adapter_stargateatlantisorg.py index 612f9951..9fd3d0a2 100644 --- a/fanficdownloader/adapters/adapter_stargateatlantisorg.py +++ b/fanficdownloader/adapters/adapter_stargateatlantisorg.py @@ -106,7 +106,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter): self.story.setMetadata('author',aut.string) aut.extract() - self.story.setMetadata('title',a.string[:(len(a.string)-3)]) + self.story.setMetadata('title',stripHTML(a)[:(len(stripHTML(a))-3)]) # Find the chapters: chapters=soup.findAll('div', {'class' : 'content'}) diff --git a/fanficdownloader/adapters/adapter_storiesofardacom.py b/fanficdownloader/adapters/adapter_storiesofardacom.py index acdddda9..0dfc4a88 100644 --- a/fanficdownloader/adapters/adapter_storiesofardacom.py +++ b/fanficdownloader/adapters/adapter_storiesofardacom.py @@ -104,7 +104,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter): asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) a.find('em').extract() - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) # Find the chapters: chapterview.asp?sid=7000&cid=30919 chapters=soup.findAll('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+"&cid=\d+$")) diff --git a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py index f724d395..90c37146 100644 --- a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py +++ b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py @@ -133,7 +133,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId'))) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_thealphagatecom.py b/fanficdownloader/adapters/adapter_thealphagatecom.py index fa28e402..ce5abec0 100644 --- a/fanficdownloader/adapters/adapter_thealphagatecom.py +++ b/fanficdownloader/adapters/adapter_thealphagatecom.py @@ -99,7 +99,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_thehookupzonenet.py b/fanficdownloader/adapters/adapter_thehookupzonenet.py index 618c21e6..22636b8c 100644 --- a/fanficdownloader/adapters/adapter_thehookupzonenet.py +++ b/fanficdownloader/adapters/adapter_thehookupzonenet.py @@ -179,7 +179,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_themasquenet.py b/fanficdownloader/adapters/adapter_themasquenet.py index 2be75ff7..e2e80ffb 100644 --- a/fanficdownloader/adapters/adapter_themasquenet.py +++ b/fanficdownloader/adapters/adapter_themasquenet.py @@ -177,7 +177,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py b/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py index 159a9467..e0a8c4fb 100644 --- a/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py +++ b/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py @@ -165,7 +165,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter): for info in asoup.findAll('td', {'class' : 'highlightcolor1'}): a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) if a != None: - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) found = 1 break index=index+1 diff --git a/fanficdownloader/adapters/adapter_thequidditchpitchorg.py b/fanficdownloader/adapters/adapter_thequidditchpitchorg.py index bdf26d2d..98bc028e 100644 --- a/fanficdownloader/adapters/adapter_thequidditchpitchorg.py +++ b/fanficdownloader/adapters/adapter_thequidditchpitchorg.py @@ -165,7 +165,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId'))) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_tokrafandomnetcom.py b/fanficdownloader/adapters/adapter_tokrafandomnetcom.py index 3576f97b..70a1d90d 100644 --- a/fanficdownloader/adapters/adapter_tokrafandomnetcom.py +++ b/fanficdownloader/adapters/adapter_tokrafandomnetcom.py @@ -131,7 +131,7 @@ class TokraFandomnetComAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_twilightarchivescom.py b/fanficdownloader/adapters/adapter_twilightarchivescom.py index 99a5c7e6..7995456b 100644 --- a/fanficdownloader/adapters/adapter_twilightarchivescom.py +++ b/fanficdownloader/adapters/adapter_twilightarchivescom.py @@ -100,7 +100,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter): ## Title a = soup.find('h1') - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find the chapters: chapters=soup.find('ol', {'class' : 'chapters'}) diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index c45b3e90..f67e985c 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -130,7 +130,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php")) diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py index 38ce5806..d88d9320 100644 --- a/fanficdownloader/adapters/adapter_twiwritenet.py +++ b/fanficdownloader/adapters/adapter_twiwritenet.py @@ -144,7 +144,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter): ## Title a = pagetitlediv.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pagetitlediv.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_walkingtheplankorg.py b/fanficdownloader/adapters/adapter_walkingtheplankorg.py index 890fc64f..11f4fc42 100644 --- a/fanficdownloader/adapters/adapter_walkingtheplankorg.py +++ b/fanficdownloader/adapters/adapter_walkingtheplankorg.py @@ -115,7 +115,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter): ## Title a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py index 4030bc58..e43f8d53 100644 --- a/fanficdownloader/adapters/adapter_whoficcom.py +++ b/fanficdownloader/adapters/adapter_whoficcom.py @@ -70,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter): raise e # pull title(title) and author from the HTML title. - title = soup.find('title').string + title = stripHTML(soup.find('title')) logger.debug('Title: %s' % title) title = title.split('::')[1].strip() self.story.setMetadata('title',title.split(' by ')[0].strip()) diff --git a/fanficdownloader/adapters/adapter_wizardtalesnet.py b/fanficdownloader/adapters/adapter_wizardtalesnet.py index 3054ea66..3f60560e 100644 --- a/fanficdownloader/adapters/adapter_wizardtalesnet.py +++ b/fanficdownloader/adapters/adapter_wizardtalesnet.py @@ -171,7 +171,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter): ## Title pt = soup.find('div', {'id' : 'pagetitle'}) a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_wolverineandroguecom.py b/fanficdownloader/adapters/adapter_wolverineandroguecom.py index cf16068d..4bdfb913 100644 --- a/fanficdownloader/adapters/adapter_wolverineandroguecom.py +++ b/fanficdownloader/adapters/adapter_wolverineandroguecom.py @@ -101,7 +101,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter): ## Title pt = soup.find('div', {'id' : 'pagetitle'}) a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.text) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_wraithbaitcom.py b/fanficdownloader/adapters/adapter_wraithbaitcom.py index 0b17a2ec..ab31c9f5 100644 --- a/fanficdownloader/adapters/adapter_wraithbaitcom.py +++ b/fanficdownloader/adapters/adapter_wraithbaitcom.py @@ -113,7 +113,7 @@ class WraithBaitComAdapter(BaseSiteAdapter): ## Title pt = soup.find('div', {'id' : 'pagetitle'}) a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',stripHTML(a)) # Find authorid and URL from... author url. alist = pt.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))