mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-27 19:25:26 +01:00
Working towards python 2.7 & 3 cross compatibility.
This commit is contained in:
parent
611e6cecf2
commit
ac3b288f3b
6 changed files with 17 additions and 16 deletions
|
|
@ -25,7 +25,7 @@ from functools import partial
|
|||
import traceback
|
||||
import copy
|
||||
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from ..htmlheuristics import replace_br_with_p
|
||||
|
|
@ -397,7 +397,7 @@ class BaseSiteAdapter(Configurable):
|
|||
if isinstance(svalue,basestring):
|
||||
# bs4/html5lib add html, header and body tags, which
|
||||
# we don't want. utf8FromSoup will strip the body tags for us.
|
||||
svalue = bs4.BeautifulSoup(svalue,"html5lib").body
|
||||
svalue = BeautifulSoup(svalue,"html5lib").body
|
||||
self.story.setMetadata('description',self.utf8FromSoup(url,svalue))
|
||||
else:
|
||||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
|
@ -546,8 +546,8 @@ class BaseSiteAdapter(Configurable):
|
|||
|
||||
## soup and re-soup because BS4/html5lib is more forgiving of
|
||||
## incorrectly nested tags that way.
|
||||
soup = bs4.BeautifulSoup(data,'html5lib')
|
||||
soup = bs4.BeautifulSoup(unicode(soup),'html5lib')
|
||||
soup = BeautifulSoup(data,'html5lib')
|
||||
soup = BeautifulSoup(unicode(soup),'html5lib')
|
||||
|
||||
for ns in soup.find_all('fff_hide_noscript'):
|
||||
ns.name = 'noscript'
|
||||
|
|
|
|||
|
|
@ -81,10 +81,10 @@ def removeEntities(text, space_only=False):
|
|||
|
||||
try:
|
||||
t = text.decode('utf-8')
|
||||
except (UnicodeEncodeError,UnicodeDecodeError), e:
|
||||
except (UnicodeEncodeError,UnicodeDecodeError) as e:
|
||||
try:
|
||||
t = text.encode ('ascii', 'xmlcharrefreplace')
|
||||
except (UnicodeEncodeError,UnicodeDecodeError), e:
|
||||
except (UnicodeEncodeError,UnicodeDecodeError) as e:
|
||||
t = text
|
||||
text = t
|
||||
# replace numeric versions of [&<>] with named versions,
|
||||
|
|
@ -106,7 +106,7 @@ def removeEntities(text, space_only=False):
|
|||
continue
|
||||
try:
|
||||
text = text.replace(e, v)
|
||||
except UnicodeDecodeError, ex:
|
||||
except UnicodeDecodeError as ex:
|
||||
# for the pound symbol in constants.py
|
||||
text = text.replace(e, v.decode('utf-8'))
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import codecs
|
|||
import bs4 as bs
|
||||
import HtmlTagStack as stack
|
||||
|
||||
from . import exceptions as exceptions
|
||||
import exceptions
|
||||
|
||||
def logdebug(s):
|
||||
# uncomment for debug output
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import logging
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from html import HtmlProcessor
|
||||
from mobihtml import HtmlProcessor
|
||||
|
||||
# http://wiki.mobileread.com/wiki/MOBI
|
||||
# http://membres.lycos.fr/microfirst/palm/pdb.html
|
||||
|
|
|
|||
|
|
@ -5,9 +5,11 @@
|
|||
|
||||
import re
|
||||
import sys
|
||||
import StringIO
|
||||
import urllib
|
||||
from six import StringIO
|
||||
from six.moves import urllib
|
||||
|
||||
# import bs4
|
||||
# BeautifulSoup = bs4.BeautifulSoup
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
class HtmlProcessor:
|
||||
|
|
@ -18,7 +18,7 @@
|
|||
import os, re
|
||||
import copy
|
||||
from collections import defaultdict
|
||||
import urlparse
|
||||
from six.moves.urllib.parse import urlparse
|
||||
import string
|
||||
import json
|
||||
import datetime
|
||||
|
|
@ -26,7 +26,6 @@ from math import floor
|
|||
from functools import partial
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import urlparse as up
|
||||
|
||||
import bs4
|
||||
|
||||
|
|
@ -147,7 +146,7 @@ except:
|
|||
|
||||
## also used for explicit no image processing.
|
||||
def no_convert_image(url,data):
|
||||
parsedUrl = up.urlparse(url)
|
||||
parsedUrl = urlparse.urlparse(url)
|
||||
|
||||
ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower()
|
||||
|
||||
|
|
@ -1184,7 +1183,7 @@ class Story(Configurable):
|
|||
else:
|
||||
try:
|
||||
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
|
||||
grayscale = self.getConfig('grayscale_images')
|
||||
imgtype = self.getConfig('convert_images_to')
|
||||
|
|
@ -1201,7 +1200,7 @@ class Story(Configurable):
|
|||
removetrans,
|
||||
imgtype,
|
||||
background="#"+self.getConfig('background_color'))
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
logger.info("Failed to load or convert image, \nparent:%s\nskipping:%s\nException: %s"%(parenturl,imgurl,e))
|
||||
return ("failedtoload","failedtoload")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue