From 01c836f2366e4d0dfa777d1ceb32a63d83dc47eb Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 1 Aug 2018 17:46:17 -0500 Subject: [PATCH] More stripping \xa0 in adapters. --- fanficfare/adapters/adapter_fireflyfansnet.py | 2 +- fanficfare/adapters/adapter_fireflypopulliorg.py | 4 ++-- fanficfare/adapters/adapter_lotrgficcom.py | 2 +- fanficfare/adapters/adapter_wwwarea52hkhnet.py | 2 +- fanficfare/adapters/adapter_wwwutopiastoriescom.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fanficfare/adapters/adapter_fireflyfansnet.py b/fanficfare/adapters/adapter_fireflyfansnet.py index a6abba78..160815d7 100644 --- a/fanficfare/adapters/adapter_fireflyfansnet.py +++ b/fanficfare/adapters/adapter_fireflyfansnet.py @@ -133,7 +133,7 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter): # which is usualy FireFly on this site, but I'm going to get them # anyway.a category = soup.find('span', {'id': 'MainContent_txtItemDetails'}) - category = stripHTML(unicode(category).replace(u"\xc2\xa0", ' ')) + category = stripHTML(unicode(category).replace(u"\xa0", u' ')) metad = category.split(' ') for meta in metad: if ":" in meta: diff --git a/fanficfare/adapters/adapter_fireflypopulliorg.py b/fanficfare/adapters/adapter_fireflypopulliorg.py index a49b8abc..07539ed1 100644 --- a/fanficfare/adapters/adapter_fireflypopulliorg.py +++ b/fanficfare/adapters/adapter_fireflypopulliorg.py @@ -186,7 +186,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter): # There alot of nbsp's (non broken spaces) in here, so I'm going to remove them # I'm also getting rid of the bold tags and the nextline characters to make it # easier to get the information below - story = repr(story).replace(b'\\xa0', '').replace(' ',' ').replace( + story = repr(story).replace(u'\\xa0', '').replace(' ',' ').replace( '','').replace('','').replace(r'\n','') story = self.make_soup(story).find('p') story_a = story.find('a') @@ -319,7 +319,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter): # the end of the section, which has alot of extraneous things, then adding my own div # wrapper, recreating the soup, then getting that div from the soup again, before sending to # the writers. - story = repr(story).replace(b'\\xa0', '').replace(' ',' ').replace(r'\n','').strip() + story = repr(story).replace(u'\\xa0', '').replace(' ',' ').replace(r'\n','').strip() story = story[12:] story = story[:story.find('

Please <')] story = '

' + story + '
' diff --git a/fanficfare/adapters/adapter_lotrgficcom.py b/fanficfare/adapters/adapter_lotrgficcom.py index d1e32d81..5d75e9fc 100644 --- a/fanficfare/adapters/adapter_lotrgficcom.py +++ b/fanficfare/adapters/adapter_lotrgficcom.py @@ -286,7 +286,7 @@ class LOTRgficComAdapter(BaseSiteAdapter): #
#

## we'll have to remove the non-breaking spaces to get this to work. - metad = unicode(metad).replace(u"\xc2\xa0",'').replace('\n','') + metad = unicode(metad).replace(u"\xa0",'').replace('\n','') for txt in metad.split('
'): if 'Challenges:' in txt: txt = txt.replace('Challenges:','').strip() diff --git a/fanficfare/adapters/adapter_wwwarea52hkhnet.py b/fanficfare/adapters/adapter_wwwarea52hkhnet.py index b49503f1..b83d06d4 100644 --- a/fanficfare/adapters/adapter_wwwarea52hkhnet.py +++ b/fanficfare/adapters/adapter_wwwarea52hkhnet.py @@ -191,7 +191,7 @@ class WWWArea52HKHNetAdapter(BaseSiteAdapter): ## I've seen a non-breaking space in some of the storyblocks ## so we are going to remove them. - series = stripHTML(unicode(series.renderContents()).replace(u"\xc2\xa0",'')).strip() + series = stripHTML(unicode(series.renderContents()).replace(u"\xa0",'')).strip() if len(series) > 0: self.story.setMetadata('series',series) diff --git a/fanficfare/adapters/adapter_wwwutopiastoriescom.py b/fanficfare/adapters/adapter_wwwutopiastoriescom.py index 1f7882ef..cad6b565 100644 --- a/fanficfare/adapters/adapter_wwwutopiastoriescom.py +++ b/fanficfare/adapters/adapter_wwwutopiastoriescom.py @@ -147,7 +147,7 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter): for detail in soup.findAll('li'): - det = unicode(detail).replace(u"\xc2\xa0",'') + det = unicode(detail).replace(u"\xa0",'') heading = stripHTML(det).split(' - ')[0] text = stripHTML(det).replace(heading+' - ','') if 'Author' in heading: