Working towards python 2.7 & 3 cross compatibility.

This commit is contained in:
Jim Miller 2018-07-26 12:29:01 -05:00
parent 611e6cecf2
commit ac3b288f3b
6 changed files with 17 additions and 16 deletions

View file

@ -25,7 +25,7 @@ from functools import partial
import traceback
import copy
import bs4
from bs4 import BeautifulSoup
from ..htmlcleanup import stripHTML
from ..htmlheuristics import replace_br_with_p
@ -397,7 +397,7 @@ class BaseSiteAdapter(Configurable):
if isinstance(svalue,basestring):
# bs4/html5lib add html, header and body tags, which
# we don't want. utf8FromSoup will strip the body tags for us.
svalue = bs4.BeautifulSoup(svalue,"html5lib").body
svalue = BeautifulSoup(svalue,"html5lib").body
self.story.setMetadata('description',self.utf8FromSoup(url,svalue))
else:
self.story.setMetadata('description',stripHTML(svalue))
@ -546,8 +546,8 @@ class BaseSiteAdapter(Configurable):
## soup and re-soup because BS4/html5lib is more forgiving of
## incorrectly nested tags that way.
soup = bs4.BeautifulSoup(data,'html5lib')
soup = bs4.BeautifulSoup(unicode(soup),'html5lib')
soup = BeautifulSoup(data,'html5lib')
soup = BeautifulSoup(unicode(soup),'html5lib')
for ns in soup.find_all('fff_hide_noscript'):
ns.name = 'noscript'

View file

@ -81,10 +81,10 @@ def removeEntities(text, space_only=False):
try:
t = text.decode('utf-8')
except (UnicodeEncodeError,UnicodeDecodeError), e:
except (UnicodeEncodeError,UnicodeDecodeError) as e:
try:
t = text.encode ('ascii', 'xmlcharrefreplace')
except (UnicodeEncodeError,UnicodeDecodeError), e:
except (UnicodeEncodeError,UnicodeDecodeError) as e:
t = text
text = t
# replace numeric versions of [&<>] with named versions,
@ -106,7 +106,7 @@ def removeEntities(text, space_only=False):
continue
try:
text = text.replace(e, v)
except UnicodeDecodeError, ex:
except UnicodeDecodeError as ex:
# for the pound symbol in constants.py
text = text.replace(e, v.decode('utf-8'))

View file

@ -22,7 +22,7 @@ import codecs
import bs4 as bs
import HtmlTagStack as stack
from . import exceptions as exceptions
import exceptions
def logdebug(s):
# uncomment for debug output

View file

@ -10,7 +10,7 @@ import logging
logger = logging.getLogger(__name__)
from html import HtmlProcessor
from mobihtml import HtmlProcessor
# http://wiki.mobileread.com/wiki/MOBI
# http://membres.lycos.fr/microfirst/palm/pdb.html

View file

@ -5,9 +5,11 @@
import re
import sys
import StringIO
import urllib
from six import StringIO
from six.moves import urllib
# import bs4
# BeautifulSoup = bs4.BeautifulSoup
from bs4 import BeautifulSoup
class HtmlProcessor:

View file

@ -18,7 +18,7 @@
import os, re
import copy
from collections import defaultdict
import urlparse
from six.moves.urllib.parse import urlparse
import string
import json
import datetime
@ -26,7 +26,6 @@ from math import floor
from functools import partial
import logging
logger = logging.getLogger(__name__)
import urlparse as up
import bs4
@ -147,7 +146,7 @@ except:
## also used for explicit no image processing.
def no_convert_image(url,data):
parsedUrl = up.urlparse(url)
parsedUrl = urlparse.urlparse(url)
ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower()
@ -1184,7 +1183,7 @@ class Story(Configurable):
else:
try:
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
except Exception, e:
except Exception as e:
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
grayscale = self.getConfig('grayscale_images')
imgtype = self.getConfig('convert_images_to')
@ -1201,7 +1200,7 @@ class Story(Configurable):
removetrans,
imgtype,
background="#"+self.getConfig('background_color'))
except Exception, e:
except Exception as e:
logger.info("Failed to load or convert image, \nparent:%s\nskipping:%s\nException: %s"%(parenturl,imgurl,e))
return ("failedtoload","failedtoload")