mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-23 01:04:44 +01:00
Fixes for encoding/make unicode issues.
This commit is contained in:
parent
61c3af67e1
commit
d43b90642f
5 changed files with 5 additions and 10 deletions
|
|
@ -30,7 +30,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six import ensure_text
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
|
@ -338,7 +337,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'.
|
||||
##They are always in the same order.
|
||||
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it
|
||||
liMetadata = ensure_text(lc2).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ')
|
||||
liMetadata = unicode(lc2).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ')
|
||||
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-'))
|
||||
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
|
||||
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')):
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ import re
|
|||
import sys
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six import ensure_text
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
|
@ -134,7 +133,7 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
# which is usualy FireFly on this site, but I'm going to get them
|
||||
# anyway.a
|
||||
category = soup.find('span', {'id': 'MainContent_txtItemDetails'})
|
||||
category = stripHTML(ensure_text(category).replace(b"\xc2\xa0", ' '))
|
||||
category = stripHTML(unicode(category).replace(u"\xc2\xa0", ' '))
|
||||
metad = category.split(' ')
|
||||
for meta in metad:
|
||||
if ":" in meta:
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six import ensure_text
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
|
@ -287,7 +286,7 @@ class LOTRgficComAdapter(BaseSiteAdapter):
|
|||
#<br/>
|
||||
#</p>
|
||||
## we'll have to remove the non-breaking spaces to get this to work.
|
||||
metad = ensure_text(metad).replace(b"\xc2\xa0",'').replace('\n','')
|
||||
metad = unicode(metad).replace(u"\xc2\xa0",'').replace('\n','')
|
||||
for txt in metad.split('<br/>'):
|
||||
if 'Challenges:' in txt:
|
||||
txt = txt.replace('Challenges:','').strip()
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six import ensure_text
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
|
@ -192,7 +191,7 @@ class WWWArea52HKHNetAdapter(BaseSiteAdapter):
|
|||
|
||||
## I've seen a non-breaking space in some of the storyblocks
|
||||
## so we are going to remove them.
|
||||
series = stripHTML(ensure_text(series.renderContents()).replace(b"\xc2\xa0",'')).strip()
|
||||
series = stripHTML(unicode(series.renderContents()).replace(u"\xc2\xa0",'')).strip()
|
||||
if len(series) > 0:
|
||||
self.story.setMetadata('series',series)
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six import ensure_text
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
from ..six.moves.urllib.parse import quote
|
||||
|
||||
|
|
@ -148,7 +147,7 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
for detail in soup.findAll('li'):
|
||||
det = ensure_text(detail).replace(b"\xc2\xa0",'')
|
||||
det = unicode(detail).replace(u"\xc2\xa0",'')
|
||||
heading = stripHTML(det).split(' - ')[0]
|
||||
text = stripHTML(det).replace(heading+' - ','')
|
||||
if 'Author' in heading:
|
||||
|
|
|
|||
Loading…
Reference in a new issue