more py2/py3 fixes

This commit is contained in:
Jim Miller 2018-08-01 19:12:13 -05:00
parent 3e98844d33
commit 101ef13956
5 changed files with 13 additions and 7 deletions

View file

@ -19,6 +19,7 @@ from __future__ import absolute_import
import re
# py2 vs py3 transition
from ..six import text_type as unicode
from ..six import ensure_binary
from ..six.moves.urllib import parse as urlparse
from ..six.moves.urllib.error import HTTPError
@ -87,7 +88,7 @@ class FanficHuAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
soup = self._customized_fetch_url(self.url + '&i=1')
if soup.title.string.encode(_SOURCE_CODE_ENCODING).strip(' :') == 'írta':
if soup.title.string.encode(_SOURCE_CODE_ENCODING).strip(b' :') == 'írta':
raise exceptions.StoryDoesNotExist(self.url)
chapter_options = soup.find('form', action='viewstory.php').select('option')
@ -143,7 +144,7 @@ class FanficHuAdapter(BaseSiteAdapter):
while index < len(cells):
cell = cells[index]
key = cell.b.string.encode(_SOURCE_CODE_ENCODING).strip(':')
key = cell.b.string.encode(_SOURCE_CODE_ENCODING).strip(b':')
try:
value = cells[index+1].string.encode(_SOURCE_CODE_ENCODING)
except AttributeError:

View file

@ -346,7 +346,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
chapter_description = '<p><b>Description:</b> %s</p><hr />' % chapter_description
fullhtml += self.getPageText(raw_page, url)
if pages:
for page_no in xrange(2, len(page_nums) + 1):
for page_no in range(2, len(page_nums) + 1):
page_url = url + "?page=%s" % page_no
logger.debug("page_url= %s" % page_url)
raw_page = self._fetchUrl(page_url)

View file

@ -20,11 +20,14 @@ import bs4
import datetime
import logging
import re
from itertools import takewhile
from ..htmlcleanup import removeEntities, stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from ..six.moves.urllib.error import HTTPError
from ..six.moves import zip as izip
from .base_adapter import BaseSiteAdapter, makeDate
@ -703,7 +706,6 @@ def _getLargestCommonPrefix(*args):
"""Returns largest common prefix of all unicode arguments, ignoring case.
:rtype : unicode
"""
from itertools import takewhile, izip
toLower = lambda xs: map(lambda x: x.lower(), xs)
allSame = lambda xs: len(set(toLower(xs))) == 1
return u''.join([i[0] for i in takewhile(allSame, izip(*args))])

View file

@ -23,6 +23,9 @@ logger = logging.getLogger(__name__)
from .. import exceptions
# py2 vs py3 transition
from ..six import ensure_text
from .base_adapter import BaseSiteAdapter, makeDate
class TestSiteAdapter(BaseSiteAdapter):
@ -68,13 +71,13 @@ class TestSiteAdapter(BaseSiteAdapter):
#print("addList:%s"%(nkey))
for val in self.get_config_list(sections,key):
#print("addList:%s->%s"%(nkey,val))
self.story.addToList(nkey,val.decode('utf-8').replace('{{storyId}}',idstr))
self.story.addToList(nkey,ensure_text(val).replace('{{storyId}}',idstr))
else:
# Special cases:
if key in ['datePublished','dateUpdated']:
self.story.setMetadata(key,makeDate(self.get_config(sections,key),"%Y-%m-%d"))
else:
self.story.setMetadata(key,self.get_config(sections,key).decode('utf-8').replace('{{storyId}}',idstr))
self.story.setMetadata(key,ensure_text(self.get_config(sections,key)).replace('{{storyId}}',idstr))
#print("set:%s->%s"%(key,self.story.getMetadata(key)))
if self.has_config(sections,'chapter_urls'):

View file

@ -1030,7 +1030,7 @@ class Configuration(configparser.SafeConfigParser):
if 'Accept' not in headers:
headers['Accept']="text/html,*/*"
req = Request(url,
data=urlencode(parameters),
data=ensure_binary(urlencode(parameters)),
headers=headers)
## Specific UA because too many sites are blocking the default python UA.