From bb7bd51d94fc59dbf6a5f6837f8e34d07319d6d9 Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Fri, 19 Apr 2019 11:24:34 +0200 Subject: [PATCH] Some more DREI stuff --- extra/safe2bin/safe2bin.py | 11 ++-- extra/shutils/pyflakes.sh | 2 +- lib/controller/controller.py | 2 +- lib/core/common.py | 41 ++++++++++----- lib/core/dump.py | 8 +-- lib/core/option.py | 2 +- lib/core/settings.py | 2 +- lib/core/target.py | 5 +- lib/request/basic.py | 11 ++-- lib/request/comparison.py | 10 ++-- lib/request/inject.py | 2 +- lib/utils/crawler.py | 3 +- plugins/dbms/sqlite/syntax.py | 4 +- thirdparty/beautifulsoup/beautifulsoup.py | 61 +++++++++++------------ thirdparty/multipart/multipartpost.py | 1 - 15 files changed, 94 insertions(+), 71 deletions(-) diff --git a/extra/safe2bin/safe2bin.py b/extra/safe2bin/safe2bin.py index 6ccd9c1fb..eb0f360cd 100644 --- a/extra/safe2bin/safe2bin.py +++ b/extra/safe2bin/safe2bin.py @@ -20,6 +20,9 @@ from optparse import OptionParser if sys.version_info >= (3, 0): xrange = range + text_type = str +else: + text_type = unicode # Regex used for recognition of hex encoded characters HEX_ENCODED_CHAR_REGEX = r"(?P\\x[0-9A-Fa-f]{2})" @@ -52,14 +55,14 @@ def safecharencode(value): retVal = value if isinstance(value, basestring): - if any([_ not in SAFE_CHARS for _ in value]): + if any(_ not in SAFE_CHARS for _ in value): retVal = retVal.replace(HEX_ENCODED_PREFIX, HEX_ENCODED_PREFIX_MARKER) retVal = retVal.replace('\\', SLASH_MARKER) for char in SAFE_ENCODE_SLASH_REPLACEMENTS: retVal = retVal.replace(char, repr(char).strip('\'')) - retVal = reduce(lambda x, y: x + (y if (y in string.printable or isinstance(value, unicode) and ord(y) >= 160) else '\\x%02x' % ord(y)), retVal, (unicode if isinstance(value, unicode) else str)()) + retVal = reduce(lambda x, y: x + (y if (y in string.printable or isinstance(value, text_type) and ord(y) >= 160) else '\\x%02x' % ord(y)), retVal, type(value)()) retVal = retVal.replace(SLASH_MARKER, "\\\\") retVal = retVal.replace(HEX_ENCODED_PREFIX_MARKER, HEX_ENCODED_PREFIX) @@ -81,7 +84,7 @@ def safechardecode(value, binary=False): while True: match = re.search(HEX_ENCODED_CHAR_REGEX, retVal) if match: - retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, unicode) else chr)(ord(binascii.unhexlify(match.group("result").lstrip("\\x"))))) + retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, text_type) else chr)(ord(binascii.unhexlify(match.group("result").lstrip("\\x"))))) else: break @@ -91,7 +94,7 @@ def safechardecode(value, binary=False): retVal = retVal.replace(SLASH_MARKER, '\\') if binary: - if isinstance(retVal, unicode): + if isinstance(retVal, text_type): retVal = retVal.encode("utf8") elif isinstance(value, (list, tuple)): diff --git a/extra/shutils/pyflakes.sh b/extra/shutils/pyflakes.sh index ac3cfd8c5..0938d5e08 100755 --- a/extra/shutils/pyflakes.sh +++ b/extra/shutils/pyflakes.sh @@ -4,4 +4,4 @@ # See the file 'LICENSE' for copying permission # Runs pyflakes on all python files (prerequisite: apt-get install pyflakes) -find . -wholename "./thirdparty" -prune -o -type f -iname "*.py" -exec pyflakes '{}' \; +find . -wholename "./thirdparty" -prune -o -type f -iname "*.py" -exec pyflakes '{}' \; | grep -v "redefines '_'" diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 68bd04ecd..7b5227713 100644 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -333,7 +333,7 @@ def start(): testSqlInj = False - if PLACE.GET in conf.parameters and not any([conf.data, conf.testParameter]): + if PLACE.GET in conf.parameters and not any((conf.data, conf.testParameter)): for parameter in re.findall(r"([^=]+)=([^%s]+%s?|\Z)" % (re.escape(conf.paramDel or "") or DEFAULT_GET_POST_DELIMITER, re.escape(conf.paramDel or "") or DEFAULT_GET_POST_DELIMITER), conf.parameters[PLACE.GET]): paramKey = (conf.hostname, conf.path, PLACE.GET, parameter[0]) diff --git a/lib/core/common.py b/lib/core/common.py index 1aaa999ca..592bfa123 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -882,6 +882,16 @@ def singleTimeLogMessage(message, level=logging.INFO, flag=None): logger.log(level, message) def boldifyMessage(message): + """ + Sets ANSI bold marking on entire message if parts found in predefined BOLD_PATTERNS + + >>> boldifyMessage("Hello World") + 'Hello World' + + >>> boldifyMessage("GET parameter id is not injectable") + '\\x1b[1mGET parameter id is not injectable\\x1b[0m' + """ + retVal = message if any(_ in message for _ in BOLD_PATTERNS): @@ -890,6 +900,13 @@ def boldifyMessage(message): return retVal def setColor(message, color=None, bold=False, level=None): + """ + Sets ANSI color codes + + >>> setColor("Hello World", "red") + '\\x1b[31mHello World\\x1b[0m' + """ + retVal = message level = level or extractRegexResult(r"\[(?P%s)\]" % '|'.join(_[0] for _ in getPublicTypeMembers(LOGGING_LEVELS)), message) @@ -933,7 +950,7 @@ def dataToStdout(data, forceOutput=False, bold=False, content_type=None, status= if multiThreadMode: logging._acquireLock() - if isinstance(data, unicode): + if isinstance(data, six.text_type): message = stdoutencode(data) else: message = data @@ -1840,7 +1857,7 @@ def safeFilepathEncode(filepath): retVal = filepath - if filepath and isinstance(filepath, unicode): + if filepath and isinstance(filepath, six.text_type): retVal = filepath.encode(sys.getfilesystemencoding() or UNICODE_ENCODING) return retVal @@ -1927,7 +1944,7 @@ def getFilteredPageContent(page, onlyText=True, split=" "): retVal = page # only if the page's charset has been successfully identified - if isinstance(page, unicode): + if isinstance(page, six.text_type): retVal = re.sub(r"(?si)||%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), split, page) retVal = re.sub(r"%s{2,}" % split, split, retVal) retVal = htmlunescape(retVal.strip().strip(split)) @@ -1945,7 +1962,7 @@ def getPageWordSet(page): retVal = set() # only if the page's charset has been successfully identified - if isinstance(page, unicode): + if isinstance(page, six.text_type): retVal = set(_.group(0) for _ in re.finditer(r"\w+", getFilteredPageContent(page))) return retVal @@ -2430,7 +2447,7 @@ def getUnicode(value, encoding=None, noneToNull=False): except UnicodeDecodeError: return six.text_type(str(value), errors="ignore") # encoding ignored for non-basestring instances -def getBytes(value, encoding=UNICODE_ENCODING): +def getBytes(value, encoding=UNICODE_ENCODING, errors="strict"): """ Returns byte representation of provided Unicode value @@ -2445,11 +2462,11 @@ def getBytes(value, encoding=UNICODE_ENCODING): for char in xrange(0xF0000, 0xF00FF + 1): value = value.replace(unichr(char), "%s%02x" % (SAFE_HEX_MARKER, char - 0xF0000)) - retVal = value.encode(encoding) + retVal = value.encode(encoding, errors) retVal = re.sub(r"%s([0-9a-f]{2})" % SAFE_HEX_MARKER, lambda _: _.group(1).decode("hex"), retVal) else: - retVal = value.encode(encoding) + retVal = value.encode(encoding, errors) retVal = re.sub(r"\\x([0-9a-f]{2})", lambda _: _.group(1).decode("hex"), retVal) return retVal @@ -3694,7 +3711,7 @@ def removeReflectiveValues(content, payload, suppressWarning=False): retVal = content try: - if all((content, payload)) and isinstance(content, unicode) and kb.reflectiveMechanism and not kb.heuristicMode: + if all((content, payload)) and isinstance(content, six.text_type) and kb.reflectiveMechanism and not kb.heuristicMode: def _(value): while 2 * REFLECTED_REPLACEMENT_REGEX in value: value = value.replace(2 * REFLECTED_REPLACEMENT_REGEX, REFLECTED_REPLACEMENT_REGEX) @@ -3786,7 +3803,7 @@ def normalizeUnicode(value): 'sucuraj' """ - return unicodedata.normalize("NFKD", value).encode("ascii", "ignore") if isinstance(value, unicode) else value + return unicodedata.normalize("NFKD", value).encode("ascii", "ignore") if isinstance(value, six.text_type) else value def safeSQLIdentificatorNaming(name, isTable=False): """ @@ -4105,7 +4122,7 @@ def asciifyUrl(url, forceQuote=False): # _urllib.parse.quote(s.replace('%', '')) != s.replace('%', '') # which would trigger on all %-characters, e.g. "&". if getUnicode(s).encode("ascii", "replace") != s or forceQuote: - return _urllib.parse.quote(s.encode(UNICODE_ENCODING) if isinstance(s, unicode) else s, safe=safe) + return _urllib.parse.quote(s.encode(UNICODE_ENCODING) if isinstance(s, six.text_type) else s, safe=safe) return s username = quote(parts.username, '') @@ -4459,8 +4476,8 @@ def decodeHexValue(value, raw=False): retVal = retVal.decode("utf-16-be") except UnicodeDecodeError: pass - if not isinstance(retVal, unicode): - retVal = getUnicode(retVal, conf.encoding or "utf8") + if not isinstance(retVal, six.text_type): + retVal = getUnicode(retVal, conf.encoding or UNICODE_ENCODING) return retVal diff --git a/lib/core/dump.py b/lib/core/dump.py index e20edd14b..aa7cd7b77 100644 --- a/lib/core/dump.py +++ b/lib/core/dump.py @@ -242,7 +242,7 @@ class Dump(object): if table and isListLike(table): table = table[0] - maxlength = max(maxlength, len(unsafeSQLIdentificatorNaming(normalizeUnicode(table) or unicode(table)))) + maxlength = max(maxlength, len(unsafeSQLIdentificatorNaming(normalizeUnicode(table) or getUnicode(table)))) lines = "-" * (int(maxlength) + 2) @@ -263,7 +263,7 @@ class Dump(object): table = table[0] table = unsafeSQLIdentificatorNaming(table) - blank = " " * (maxlength - len(normalizeUnicode(table) or unicode(table))) + blank = " " * (maxlength - len(normalizeUnicode(table) or getUnicode(table))) self._write("| %s%s |" % (table, blank)) self._write("+%s+\n" % lines) @@ -358,7 +358,7 @@ class Dump(object): for ctables in dbTables.values(): for tables in ctables.values(): for table in tables: - maxlength1 = max(maxlength1, len(normalizeUnicode(table) or unicode(table))) + maxlength1 = max(maxlength1, len(normalizeUnicode(table) or getUnicode(table))) for db, counts in dbTables.items(): self._write("Database: %s" % unsafeSQLIdentificatorNaming(db) if db else "Current database") @@ -384,7 +384,7 @@ class Dump(object): tables.sort(key=lambda _: _.lower() if hasattr(_, "lower") else _) for table in tables: - blank1 = " " * (maxlength1 - len(normalizeUnicode(table) or unicode(table))) + blank1 = " " * (maxlength1 - len(normalizeUnicode(table) or getUnicode(table))) blank2 = " " * (maxlength2 - len(str(count))) self._write("| %s%s | %d%s |" % (table, blank1, count, blank2)) diff --git a/lib/core/option.py b/lib/core/option.py index 6d2561381..1c53c07d3 100644 --- a/lib/core/option.py +++ b/lib/core/option.py @@ -1716,7 +1716,7 @@ def _cleanupOptions(): except re.error: conf.csrfToken = re.escape(conf.csrfToken) finally: - class _(unicode): + class _(six.text_type): pass conf.csrfToken = _(conf.csrfToken) conf.csrfToken._original = original diff --git a/lib/core/settings.py b/lib/core/settings.py index f3f2ddcc6..678102025 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -17,7 +17,7 @@ from lib.core.enums import DBMS_DIRECTORY_NAME from lib.core.enums import OS # sqlmap version (...) -VERSION = "1.3.4.28" +VERSION = "1.3.4.29" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) diff --git a/lib/core/target.py b/lib/core/target.py index c6522f490..995fe95ea 100644 --- a/lib/core/target.py +++ b/lib/core/target.py @@ -73,6 +73,7 @@ from lib.core.settings import URI_INJECTABLE_REGEX from lib.core.settings import USER_AGENT_ALIASES from lib.core.settings import XML_RECOGNITION_REGEX from lib.utils.hashdb import HashDB +from thirdparty import six from thirdparty.odict import OrderedDict from thirdparty.six.moves import urllib as _urllib @@ -409,7 +410,7 @@ def _setRequestParams(): message += "Do you want sqlmap to automatically update it in further requests? [y/N] " if readInput(message, default='N', boolean=True): - class _(unicode): + class _(six.text_type): pass conf.csrfToken = _(re.escape(getUnicode(parameter))) conf.csrfToken._original = getUnicode(parameter) @@ -712,7 +713,7 @@ def initTargetEnv(): _setDBMS() if conf.data: - class _(unicode): + class _(six.text_type): pass kb.postUrlEncode = True diff --git a/lib/request/basic.py b/lib/request/basic.py index 2f112d6ed..cd2a66cbb 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -17,6 +17,7 @@ from lib.core.common import Backend from lib.core.common import extractErrorMessage from lib.core.common import extractRegexResult from lib.core.common import filterNone +from lib.core.common import getBytes from lib.core.common import getPublicTypeMembers from lib.core.common import getSafeExString from lib.core.common import getUnicode @@ -42,11 +43,11 @@ from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE from lib.core.settings import META_CHARSET_REGEX from lib.core.settings import PARSE_HEADERS_LIMIT from lib.core.settings import SELECT_FROM_TABLE_REGEX -from lib.core.settings import UNICODE_ENCODING from lib.core.settings import VIEWSTATE_REGEX from lib.parse.headers import headersParser from lib.parse.html import htmlParser from lib.utils.htmlentities import htmlEntities +from thirdparty import six from thirdparty.chardet import detect from thirdparty.odict import OrderedDict @@ -219,13 +220,13 @@ def checkCharEncoding(encoding, warn=True): # Reference: http://www.iana.org/assignments/character-sets # Reference: http://docs.python.org/library/codecs.html try: - codecs.lookup(encoding.encode(UNICODE_ENCODING) if isinstance(encoding, unicode) else encoding) - except (LookupError, ValueError): + codecs.lookup(encoding) + except: encoding = None if encoding: try: - unicode(randomStr(), encoding) + six.text_type(getBytes(randomStr()), encoding) except: if warn: warnMsg = "invalid web page charset '%s'" % encoding @@ -313,7 +314,7 @@ def decodePage(page, contentEncoding, contentType): kb.pageEncoding = conf.encoding # can't do for all responses because we need to support binary files too - if not isinstance(page, unicode) and "text/" in contentType: + if isinstance(page, six.binary_type) and "text/" in contentType: # e.g. Ãëàâà if "&#" in page: page = re.sub(r"&#x([0-9a-f]{1,2});", lambda _: (_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)).decode("hex"), page) diff --git a/lib/request/comparison.py b/lib/request/comparison.py index 9b8a9cfae..3fc717d74 100644 --- a/lib/request/comparison.py +++ b/lib/request/comparison.py @@ -8,6 +8,7 @@ See the file 'LICENSE' for copying permission import re from lib.core.common import extractRegexResult +from lib.core.common import getBytes from lib.core.common import getFilteredPageContent from lib.core.common import listToStrValue from lib.core.common import removeDynamicContent @@ -28,6 +29,7 @@ from lib.core.settings import LOWER_RATIO_BOUND from lib.core.settings import UPPER_RATIO_BOUND from lib.core.settings import URI_HTTP_HEADER from lib.core.threads import getCurrentThreadData +from thirdparty import six def comparison(page, headers, code=None, getRatioValue=False, pageLength=None): _ = _adjust(_comparison(page, headers, code, getRatioValue, pageLength), getRatioValue) @@ -105,10 +107,10 @@ def _comparison(page, headers, code, getRatioValue, pageLength): else: # Preventing "Unicode equal comparison failed to convert both arguments to Unicode" # (e.g. if one page is PDF and the other is HTML) - if isinstance(seqMatcher.a, str) and isinstance(page, unicode): - page = page.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore") - elif isinstance(seqMatcher.a, unicode) and isinstance(page, str): - seqMatcher.a = seqMatcher.a.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore") + if isinstance(seqMatcher.a, six.binary_type) and isinstance(page, six.text_type): + page = getBytes(page, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore") + elif isinstance(seqMatcher.a, six.text_type) and isinstance(page, six.binary_type): + seqMatcher.a = getBytes(seqMatcher.a, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore") if any(_ is None for _ in (page, seqMatcher.a)): return None diff --git a/lib/request/inject.py b/lib/request/inject.py index eddab9b7f..0715f54ee 100644 --- a/lib/request/inject.py +++ b/lib/request/inject.py @@ -486,7 +486,7 @@ def getValue(expression, blind=True, union=True, error=True, time=True, fromUser singleTimeWarnMessage(warnMsg) # Dirty patch (safe-encoded unicode characters) - if isinstance(value, unicode) and "\\x" in value: + if isinstance(value, six.text_type) and "\\x" in value: try: candidate = eval(repr(value).replace("\\\\x", "\\x").replace("u'", "'", 1)).decode(conf.encoding or UNICODE_ENCODING) if "\\x" not in candidate: diff --git a/lib/utils/crawler.py b/lib/utils/crawler.py index 7a86a947d..8f104928c 100644 --- a/lib/utils/crawler.py +++ b/lib/utils/crawler.py @@ -32,6 +32,7 @@ from lib.core.threads import getCurrentThreadData from lib.core.threads import runThreads from lib.parse.sitemap import parseSitemap from lib.request.connect import Connect as Request +from thirdparty import six from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup from thirdparty.six.moves import http_client as _http_client from thirdparty.six.moves import urllib as _urllib @@ -79,7 +80,7 @@ def crawl(target): if not kb.threadContinue: break - if isinstance(content, unicode): + if isinstance(content, six.text_type): try: match = re.search(r"(?si)]*>(.+)", content) if match: diff --git a/plugins/dbms/sqlite/syntax.py b/plugins/dbms/sqlite/syntax.py index 5a39528e1..09cc7cc7c 100644 --- a/plugins/dbms/sqlite/syntax.py +++ b/plugins/dbms/sqlite/syntax.py @@ -7,8 +7,8 @@ See the file 'LICENSE' for copying permission import binascii +from lib.core.common import getBytes from lib.core.common import isDBMSVersionAtLeast -from lib.core.settings import UNICODE_ENCODING from plugins.generic.syntax import Syntax as GenericSyntax class Syntax(GenericSyntax): @@ -28,7 +28,7 @@ class Syntax(GenericSyntax): def escaper(value): # Reference: http://stackoverflow.com/questions/3444335/how-do-i-quote-a-utf-8-string-literal-in-sqlite3 - return "CAST(X'%s' AS TEXT)" % binascii.hexlify(value.encode(UNICODE_ENCODING) if isinstance(value, unicode) else value) + return "CAST(X'%s' AS TEXT)" % binascii.hexlify(getBytes(value)) retVal = expression diff --git a/thirdparty/beautifulsoup/beautifulsoup.py b/thirdparty/beautifulsoup/beautifulsoup.py index c3734494f..c02e1c72e 100644 --- a/thirdparty/beautifulsoup/beautifulsoup.py +++ b/thirdparty/beautifulsoup/beautifulsoup.py @@ -91,6 +91,11 @@ import sys if sys.version_info >= (3, 0): xrange = range + text_type = str + binary_type = bytes +else: + text_type = unicode + binary_type = str try: from htmlentitydefs import name2codepoint @@ -434,19 +439,13 @@ class PageElement(object): def toEncoding(self, s, encoding=None): """Encodes an object to a string in some encoding, or to Unicode. .""" - if isinstance(s, unicode): + if isinstance(s, text_type): if encoding: s = s.encode(encoding) - elif isinstance(s, str): - if encoding: - s = s.encode(encoding) - else: - s = unicode(s) + elif isinstance(s, binary_type): + s = s.encode(encoding or "utf8") else: - if encoding: - s = self.toEncoding(str(s), encoding) - else: - s = unicode(s) + s = self.toEncoding(str(s), encoding or "utf8") return s BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" @@ -459,7 +458,7 @@ class PageElement(object): return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" -class NavigableString(unicode, PageElement): +class NavigableString(text_type, PageElement): def __new__(cls, value): """Create a new NavigableString. @@ -469,9 +468,9 @@ class NavigableString(unicode, PageElement): passed in to the superclass's __new__ or the superclass won't know how to handle non-ASCII characters. """ - if isinstance(value, unicode): - return unicode.__new__(cls, value) - return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + if isinstance(value, text_type): + return text_type.__new__(cls, value) + return text_type.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) def __getnewargs__(self): return (NavigableString.__str__(self),) @@ -1006,7 +1005,7 @@ class SoupStrainer: if isinstance(markup, Tag): markup = markup.name if markup and not isinstance(markup, basestring): - markup = unicode(markup) + markup = text_type(markup) #Now we know that chunk is either a string, or None. if hasattr(matchAgainst, 'match'): # It's a regexp object. @@ -1016,8 +1015,8 @@ class SoupStrainer: elif hasattr(matchAgainst, 'items'): result = markup.has_key(matchAgainst) elif matchAgainst and isinstance(markup, basestring): - if isinstance(markup, unicode): - matchAgainst = unicode(matchAgainst) + if isinstance(markup, text_type): + matchAgainst = text_type(matchAgainst) else: matchAgainst = str(matchAgainst) @@ -1181,7 +1180,7 @@ class BeautifulStoneSoup(Tag, sgmllib.SGMLParser): def _feed(self, inDocumentEncoding=None, isHTML=False): # Convert the document to Unicode. markup = self.markup - if isinstance(markup, unicode): + if isinstance(markup, text_type): if not hasattr(self, 'originalEncoding'): self.originalEncoding = None else: @@ -1792,9 +1791,9 @@ class UnicodeDammit: self._detectEncoding(markup, isHTML) self.smartQuotesTo = smartQuotesTo self.triedEncodings = [] - if markup == '' or isinstance(markup, unicode): + if markup == '' or isinstance(markup, text_type): self.originalEncoding = None - self.unicode = unicode(markup) + self.unicode = text_type(markup) return u = None @@ -1807,7 +1806,7 @@ class UnicodeDammit: if u: break # If no luck and we have auto-detection library, try that: - if not u and chardet and not isinstance(self.markup, unicode): + if not u and chardet and not isinstance(self.markup, text_type): u = self._convertFrom(chardet.detect(self.markup)['encoding']) # As a last resort, try utf-8 and windows-1252: @@ -1880,7 +1879,7 @@ class UnicodeDammit: elif data[:4] == '\xff\xfe\x00\x00': encoding = 'utf-32le' data = data[4:] - newdata = unicode(data, encoding) + newdata = text_type(data, encoding) return newdata def _detectEncoding(self, xml_data, isHTML=False): @@ -1893,41 +1892,41 @@ class UnicodeDammit: elif xml_data[:4] == '\x00\x3c\x00\x3f': # UTF-16BE sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + xml_data = text_type(xml_data, 'utf-16be').encode('utf-8') elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ and (xml_data[2:4] != '\x00\x00'): # UTF-16BE with BOM sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + xml_data = text_type(xml_data[2:], 'utf-16be').encode('utf-8') elif xml_data[:4] == '\x3c\x00\x3f\x00': # UTF-16LE sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + xml_data = text_type(xml_data, 'utf-16le').encode('utf-8') elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ (xml_data[2:4] != '\x00\x00'): # UTF-16LE with BOM sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + xml_data = text_type(xml_data[2:], 'utf-16le').encode('utf-8') elif xml_data[:4] == '\x00\x00\x00\x3c': # UTF-32BE sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + xml_data = text_type(xml_data, 'utf-32be').encode('utf-8') elif xml_data[:4] == '\x3c\x00\x00\x00': # UTF-32LE sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + xml_data = text_type(xml_data, 'utf-32le').encode('utf-8') elif xml_data[:4] == '\x00\x00\xfe\xff': # UTF-32BE with BOM sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + xml_data = text_type(xml_data[4:], 'utf-32be').encode('utf-8') elif xml_data[:4] == '\xff\xfe\x00\x00': # UTF-32LE with BOM sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + xml_data = text_type(xml_data[4:], 'utf-32le').encode('utf-8') elif xml_data[:3] == '\xef\xbb\xbf': # UTF-8 with BOM sniffed_xml_encoding = 'utf-8' - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + xml_data = text_type(xml_data[3:], 'utf-8').encode('utf-8') else: sniffed_xml_encoding = 'ascii' pass diff --git a/thirdparty/multipart/multipartpost.py b/thirdparty/multipart/multipartpost.py index 61a3144a6..b2ff6c850 100644 --- a/thirdparty/multipart/multipartpost.py +++ b/thirdparty/multipart/multipartpost.py @@ -21,7 +21,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """ import io -import mimetools import mimetypes import os import stat