From c0459faa4398e77211507d6a92b4696ef9555b0a Mon Sep 17 00:00:00 2001 From: sigizmund Date: Tue, 15 Dec 2009 15:23:48 +0000 Subject: [PATCH 01/94] added --- BeautifulSoup.py | 1711 ++++++++++++++++++++++++++++++++++++++++++++++ constants.py | 135 ++++ downaloder.py | 74 ++ ffa.py | 187 +++++ fictionalley.py | 75 ++ ficwad.py | 97 +++ output.py | 136 ++++ 7 files changed, 2415 insertions(+) create mode 100644 BeautifulSoup.py create mode 100644 constants.py create mode 100644 downaloder.py create mode 100644 ffa.py create mode 100644 fictionalley.py create mode 100644 ficwad.py create mode 100644 output.py diff --git a/BeautifulSoup.py b/BeautifulSoup.py new file mode 100644 index 00000000..458f08a1 --- /dev/null +++ b/BeautifulSoup.py @@ -0,0 +1,1711 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +v3.0.0 +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.0.0" +__date__ = "$Date: 2004/10/18 00:14:20 $" +__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson" +__license__ = "PSF" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import types +import re +import sgmllib +from htmlentitydefs import name2codepoint + +#This code makes Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +# First, the classes that represent markup elements. + +class PageElement: + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.contents.index(self) + if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent: + # We're replacing this element with one of its siblings. + index = self.parent.contents.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + self.parent.contents.remove(self) + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if (isinstance(newChild, basestring) + or isinstance(newChild, unicode)) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent != None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent == self: + index = self.find(newChild) + if index and index < position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before after Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + else: + # Build a SoupStrainer + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return __str__(self, None) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs == None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isString(val): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + if '"' in val: + fmt = "%s='%s'" + # This can't happen naturally, but it can happen + # if you modify an attribute value and print it out. + if "'" in val: + val = val.replace("'", "&squot;") + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this + Tag matching the given criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findAllChildren = findAll + + #Utility methods + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.contents.append(tag) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + for i in range(0, len(self.contents)): + yield self.contents[i] + raise StopIteration + + def recursiveChildGenerator(self): + stack = [(self, 0)] + while stack: + tag, start = stack.pop() + if isinstance(tag, Tag): + for i in range(start, len(tag.contents)): + a = tag.contents[i] + yield a + if isinstance(a, Tag) and tag.contents: + if i < len(tag.contents) - 1: + stack.append((tag, i+1)) + stack.append((a, 0)) + break + raise StopIteration + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name=name + self.attrs=attrs.copy() + self.attrs.update(kwargs) + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if isList(markup) and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isString(markup): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst == True and type(matchAgainst) == types.BooleanType: + result = markup != None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isString(markup): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif isList(matchAgainst): + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isString(markup): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def isList(l): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is listlike.""" + return hasattr(l, '__iter__') \ + or (type(l) in (types.ListType, types.TupleType)) + +def isString(s): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is stringlike.""" + try: + return isinstance(s, unicode) or isintance(s, basestring) + except NameError: + return isinstance(s, str) + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif isList(portion): + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + XML_ENTITY_LIST = {} + for i in ["quot", "apos", "amp", "lt", "gt"]: + XML_ENTITY_LIST[i] = True + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
(No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed() + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def _feed(self, inDocumentEncoding=None): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + if markup: + if self.markupMassage: + if not isList(self.markupMassage): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ + or methodName.find('do_') == 0: + return SGMLParser.__getattr__(self, methodName) + elif methodName.find('__') != 0: + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + # Tags with just one string-owning child get the child as a + # 'string' property, so that soup.tag.string is shorthand for + # soup.tag.contents[0] + if len(self.currentTag.contents) == 1 and \ + isinstance(self.currentTag.contents[0], NavigableString): + self.currentTag.string = self.currentTag.contents[0] + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = ''.join(self.currentData) + if not currentData.strip(): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

FooBar

should pop to 'p', not 'b'. +

FooBar

should pop to 'table', not 'p'. +

Foo

Bar

should pop to 'tr', not 'p'. +

FooBar

should pop to 'p', not 'b'. + +

    • *
    • * should pop to 'ul', not the first 'li'. +
  • ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers != None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers == None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s" % name + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = "xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities in [self.HTML_ENTITIES, + self.XML_ENTITIES]: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertEntities == self.HTML_ENTITIES or \ + (self.convertEntities == self.XML_ENTITIES and \ + self.XML_ENTITY_LIST.get(ref)): + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + if not data: + data = '&%s;' % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

    tag should implicitly close the previous

    tag. + +

    Para1

    Para2 + should be transformed into: +

    Para1

    Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

    tag should _not_ implicitly close the previous +
    tag. + + Alice said:
    Bob said:
    Blah + should NOT be transformed into: + Alice said:
    Bob said:
    Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
    , + but not close a tag in another table. + +
    BlahBlah + should be transformed into: +
    BlahBlah + but, + Blah
    Blah + should NOT be transformed into + Blah
    Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ['br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base']) + + QUOTE_TAGS = {'script': None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center'] + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)") + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if getattr(self, 'declaredHTMLEncoding') or \ + (self.originalEncoding == self.fromEncoding): + # This is our second pass through the document, or + # else an encoding was specified explicitly and it + # worked. Rewrite the meta tag. + newAttr = self.CHARSET_RE.sub\ + (lambda(match):match.group(1) + + "%SOUP-ENCODING%", value) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the new information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + tag = self.unknown_starttag("meta", attrs) + if tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big'] + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + + + + + + + + +
    +

    + FanFiction Downloader +

    + + +
    +
    + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
    + +
    + Ebook format   +
    + +
    + +
    + + + +
    + + + +
    +
    + +

    + Login and Password +

    +
    + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
    +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    +
    + + +
    + + +
    + +
    +
    + Few things to know, which will make your life substantially easier: +
      +
    1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
    2. +
    3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
    4. +
    5. Paste a URL of the first chapter of the fanfic, not the index page
    6. +
    7. Fics with a single chapter are not supported (you can just copy and paste it)
    8. +
    9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
    10. +
    11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
    12. +
    13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
    14. +
    15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
    16. +
    17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
    18. +
    + Otherwise, just have fun, and if you want to say thank you — use the email above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..4987804d --- /dev/null +++ b/index.html @@ -0,0 +1,189 @@ + + + + + Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com to epub and HTML to Stanza, Kindle, Nook, Sony Reader + + + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + {{yourfile}} + + + {% if authorized %} +
    +
    +
    + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier. +
      +
    • + For fictionalley.org, please paste the URL of the story's chapter list in the box, such as + this. Or the story text URL for + fictionalley.org one-shots, such + as this. +
    • +
    • + For all other supported sites, please paste the URL of the first chapter in the box. For + one-shots, the first chapter is the whole story. +
    • +
    • + Alternatively, see your personal list of previously downloaded fanfics. +
    • +
    +
    +
    + {{ error_message }} +
    + +
    + +
    +
    Ebook format
    +
    + EPub + HTML + Plain Text +
    +
    + +

    Login and Password

    +
    + + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide + your credentials to download it, otherwise just leave it empty +
    +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    +
    + +
    + + {% else %} +
    +
    +

    + This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

    +

    Login using Google account

    +
    +
    + {% endif %} + +
    + Few things to know, which will make your life substantially easier: +
      +
    1. + First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
    2. +
    3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
    4. +
    5. + Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, and twilighted.net. + (fanficauthors.net withdrawn as they offer native ePub functionality now.) +
    6. +
    7. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
    8. +
    9. + Paste a URL of the first chapter of the fanfic, not the index page, except for fictionalley.org. +
    10. +
    11. + For fictionalley.org, you need to use the URL of the story's chapter list, such as + this. Or the story text URL for fictionalley.org + one-shots, such as this. +
    12. +
    13. + One-shots, fics with a single chapter, are now supported. +
    14. +
    15. + You can download fanfics and store them for 'later' by just downloading them and visiting recent + downloads section. +
    16. +
    17. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
    18. +
    19. + If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is + too large to save in the database and you need to download it straight away. +
    20. +
    21. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
    22. +
    23. + If you think that something that should work in fact doesn't, drop me a mail + to sigizmund@gmail.com, or, even better, write an email to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
    24. +
    + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    + +
    + + + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..bbed2dff --- /dev/null +++ b/index.yaml @@ -0,0 +1,22 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
    " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "
    ", "
    " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + + + + + {{yourfile}} + + +

    +
    + Hi, {{ nickname }}! These fanfics you've downloaded previously. +
    +
    + +
    + {% for fic in fics %} +

    {{ fic.name }} by {{ fic.author }} ({{ fic.format }})
    {{ fic.url }}

    + {% endfor %} +
    + + + + + + + + + + diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f01003d4f81d37513d0f8a2a5fb857b8448ae2bd GIT binary patch literal 12071 zcmeHNL37+jc5aXoC52lM??!ES`^Va5uF#&l87#d{Ux!3 zn-|3n?q3p7OB|dN$$7DJUN}^KM;t7P z_xSsL5nU6}&*=IaadGDz>Vo#>kD8f3w86!7@%u|+hsD0O&EIev42oGpIC}l95x%f< zrIyx+#l_FX@3E|_XH|W`RXp2m??ckj2g^eIdi&8s>HRu*9&Cq2oR{*^@Rna)x_EB5coSj#}_YN%Byvr%iNvp!DC;7EE8?)~QTmG#@}@@5f9 z6~#tUrBx&Y>YT*;?9r*L2+zFO@cy?gJgjIku=it zI6O$yKw_vWQQDVVC9RKys3XiN4U*(oP6A929~HHpV;JbA9?3{Cv$KQAFtd$ioXRhc z%Q2d-`?tGtSe1<^-3qfw4jm7%gz{J(#^re0_!dvG>H9Gky|5|@m6pkIM~(yC((!&8 zkK!;$OPQ;J^_GT82GMie3ig%mO7&c&EIY&4m5$SWUR##amIR5s*P>;nyd(&aI#(*H zat-xANW(0m4#PmlVLi9Z*vB|lP;7`Fy|K}1N&LHe7p5`Ev!ayKJ)`|5?KCaejG}6i zYj4*bWtrQRFWg~JxEs>L@7E|l%u>~rYyL-Fx!yT>+Tp(LZX2!JXx&EZ_J-WW@7E}& zRg%=LpPoE*o00MYo5q9tX1w+uiP)p=M&`_o*Y~R2y=ra!<}J7G!=?7?JGgs$P20Wi zX!oKWVi{OuduV?H`aS7N4ITCm)Un=tTvW=8`=ZUYGp)JzNi&a8kxk@wiAC>kJ*qdN zE;p^>Ol~%eL#+kpb%I85+Dcc=8GuNYyJ!st{ z8`R+21;w0HWLzV4KEj*5=_9?0)o==6&jfOlQ&B&Q%x(N&GdP9*(Wn zUq*IYzTb{SY6J(`r$~{gBQFZe&IXU>`#zb1j7QS#*Y*9rOZJ0S^Npw>48JN;gr-K) zu8UKi(D6oxT{oTt`>wUMTDt9o`g&0QZTyAZ@)zxyDZmy>Y$yB_iAQM-mn0mQ>nE-; z+j;<_oc=h=4mPLjG|KnZe!2c^x(_z8K#vfXoH>s*e+|(CPC={w2y-hpZEGKgf_kw5 zox10_)Xj{;cKG@|d^=x8d&oUiy-yyN{pvo(2+o9CLPho6daF(~oY~7=H1kQxT{=iU z>DU~}TDwIMYb75a=juUGWQA9#yzsJ){H1IY#!0i%m?)4F+iWmQl#PrKF|T41LD$iD z4Rgbqf+{ID=htPF=mhd|eQ&^Hzw1I*0} zKSO$}^@JhP6u_e~QbSX{Pn zImHbun2iFO;RRRaXyw!L0$N!E4QW|4!u$CA3qJbK_FN%{Tkvir!S~ADQoIrCiB`{sg2xOP$a6!=E7X^)aNnkh(@~yZy zEC1+2@p4(*e|k-vTox}eHl$ppv7N8}UHlFc-}Xou`VUagwsjdw4hAsn0VoymI*xdT zzm?#61{TtB84N}_8hHWR@nGN|7C4YzXE0dN6%3+a+Z@G-T1nyqMGg)2+5rn8hqpC? zfZ6~ch6oyB3^AD$HBlUvxJ%Z7TR|y6_SjB#L0mQp7&_~?TDhn!KM>;Ms%#-y9n0V`o9<)gl;VvG->!xMNIJT&8OrK31S zV#Cg2TAWjiamz+40qitgN!31*BF_~DFV(&(?1|v%1w{dqSBaYNaI{&*QShWDYBHo8 zP`#(KQ5olx6D;f=%%CzsZY1&L=P8CFqGoQeDCqbjBPUCd{(&A4&}6C(nUncYz3~Kf zs%VmFqf?^11hbTeKu(~|n(#F8*cFond2oc2ep3Z-g$;?Aaz(gL;9?SVG_VUddlDT zC1Qh_Wrmp)aRm1QrC3e-_2OtFCJH&hh`f2d0Jyx!q)FkY*)?_G!TB`8{K!R=hk`JsN_&yRZ~ z?rVJX-_cd~s&mD0owjpr;j+_m-pBuC=lbFWvE+obaMD);`~D3DfTHCr1Ka{=8{-G) zFTfAb-wu9&wG6oX4GIlT`wWE!1KvadFwDTde?$Rj%=GyHg%+g0HxK)^8QnFKE$BOM zKp$=c^x@gYB11pGH9$XrK0rT!K7PnSLj&|-On^SdeY2p?0=hrLVzyrabg<4>0G)*n zb3V=Da&urA#E{vOXZ!2KWi~0oF}H~|HLjhaA9BhZU*72mP19r zQt2=t%t?EMM&#qDHV8qBGT71omI$hO zJ7ORE3JXl4EUbtMBAGy7#Xc@KYnjGDW+r$f&zuo%%I2a#Kg_mSYS;u)WQ`D7xsGCO zhW-VQ3JQ`+&JbF}pMc?|D{Fx2jCfER8b3M3F~Gyp02_%<=CFw+g@4kP=<>IQ4XJz3o7aY3LxcX7!n3JEHD|%d%5jwB8HTX zEc__%c=bX-Ozg8} z(jz|C!@(7t?B`QTUZF>?Se+8yyFvHpouVEQtG|dZqv`w?KAI*Wu3u%c*7z#&$?U5Z z;qxdDzQhT5qGj+@ra-i8u`UoT4}yxX`%wrGH@r;hiKV_*U_?O7)#3*3@tNb zToTz8bOtp81#`q5CURyFTLr#`ss&qRCS$8W;!w3{tITX{l}YEHvsIV^+B($yTj4a> zeCv1r15|z9?^4=;PYQDdI_@(^yGhc_xLdeYvR*c?H*>e(A@;08wi%}3LgX;s(C_ky z?0aG6ukpKY=5|>Uxn9oJ{}8v!lIn*G*6p71x-)KbR1tZ z^&43~xy3qq%(yFxO?cO#37Xx8d7p|GoZz-70r3Kfq2ky+mZD@i0R=d2yGy?Of&v{s z%Z4q%GRZibgf<%Uj&qvbODkk)%YuUonw<&(2nDbNW3Ti^Bjxbuc<~wxe4ytKoKIzW zb!@;?=+y)t{+?e^XpE$B&OgH1;o{$F<>>b#t{c)QqhGI_)ldh)!C*f8yxF2D<%(TK z95fr1(KBBHZN%B}NOwc7)DIrIa(ab_6!m+9=3Ny|TbZOd$NE#7YjVhNLh2|~d``(2 z;}kidI5K+*>!61ZjfWsqRnbeW0C>ip|3YD&g8S+COF7<$Uc*!7Q@h6Y3+656+B|Nj zxQZvvh_L?;(VcL{6%4Nb0T-g}Iyao_j^Qbnk))mdJoMf}6MjbD?;|Aj`;2Y+eVhMB znMg;!4+o8F;##<_kZ~_;mDK<*o7*x3R>d^E{VRGF%eaSL37V2UP9XP4Nj;iqufa$j zNvlbN??bUIMMVdWXMnUth%ajb-P5 E0OpUxq5uE@ literal 0 HcmV?d00001 diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ae9b3591ee9c6400d5cd09eb0a05999ef680bdc GIT binary patch literal 11292 zcmcIq&2tmkcE2sjACi&37_iM}!weOf0h^grg(*VjJp*RO4@U6XFwFBak=p8(-B{|D zyIWwr$eTbG*{8C4o2;@-Ro*I9S){Vde~_xAD%-5H%x3cYo!i}#{XArtI09eY&vVZ` z=iKvk^}GKnmp=LJZ`T?s`IPbfbNogBXei|K(wGQ9BBIomM%g zsO}crSyW1_%%C!%?rxlw9p;pSt)|pl;4v>toH8a%Vx}{N$}}rnGRq2~eZplA;DT$P?H3UqU?swTMdF{uF3{A%N4Zo6Ljgj~FYVy)e2 zxoy{v?br)^GiaK>SbtJ|;5Gus4NKnl_*5(4wu(=oXxqDPEo#`l?}oa!L_p|>;?VQA z;|Vl}0Epz|(Km*_`<&WE0Xgwd=G0q59RM<#;7(q>%B$d(GWr-RC=>F<|0C)z@@hf% zT|U$|hqb<;j5x2n0)+sRz^%=kIsn|9>Km$$GN&e*c^sK2RAUq6?S~J)xHa@ad#H8m z762->rtLSq#v6BQIr7NLS5e?EYplYhPA{GUj9Rl5*l{BOldw-TgRY2tQ^u4rZQL^E zjRwzEz#k7Ld|F2Z^dz6;R)d&tag+0Ej+E8x%Bi3Pc7fP=<)4!pmtg+Tl6ef04SCh1 z;Bk{`M1d*+w#>$HxqZyN8$s9C)PguU?!L?y(l|jwzB@rA)N6UZ>j%lAmLHrj-6$>G zq7k&tqnglb!6+N0+=9{IG01F6hR|ytl^7%wnM=2d3=!+kmbMrmx%$r;+T(Zvcpu)w#cPpfUu%#VsTeE0br_&zScdU=tb0+O+ zwqv;=7)iKC@}UJxPn?0goo*NcuTM~mCQ+!|8KayZg}k3Z84>rJYq&nX7x!YKk~Mfo zt9CUa>XoJIKU=K)`s(K9=Kif)H#he`Yrb6UQi88SbD@;_rpWZB)R(1yS?c3b-;(+% zsW+uwlDaSTveZMV7o{FYybRsAdGm_~4??0+wTh_y9N6&&&Wlvx24V>=B_(KECBg!Q zdH^wzt2hrS+W+qyg-rwu(s@u+{bCB?f~ZjN6r#o%JqYVKq5le1X(r{!7;2OlVKOq_)Dg+L}Q~!zXDTobN zK?9~>5%&c+Ptby$Jvc=<1Csqy=`m6y_pTU5AHXMww6XVxvRiL5nYM6HRxkWdRRN+$ z@8Ys}7aG>Vf$vh2n2s90sky`LJ>5635^Tj)KDu zsJ0|4q_Et$8~T`Q#$EU$38fNToaEqg1TG(@=3pnGq(B=`6J|nFbO{ayct>mZ+y}aC z&$mh2lz${OTu~6hGC7g6H)Z--LIrq2hfii+1IJ_0yj^jR(wUo@_IFHZK=>*&dtwu^ z@$Pr{BpmXlIY<2~itqo8#rK%~G5QYA-qdTNF1WT0%*~Ds=Z^kHL^o)&>rvCgIf7eE zFU?c`j)GQzzO$-U#LUj28gs)3s}EP6KYmtQt=@m~V6FOmr&K z1z+ZDhe5X!$s8~8#5ZiX(Bs(0_vrr1&>Fok5iqfkkKCSakQAZY3B~xg5I^M6;rX#e zJ1Q&N_I?UTkO$D6P&|iMf#+AmPY_OOl(Z{=eHlh@0r?5fnKY?w>94j7GWD(LvU zf`JqC&lys8dJwn3>uA8N%WPOF2>M&B$I7GY#i#%V0z_SKGiK~6RC`}n{U;Yq&vQOGBg2ROmu*XPY} z`pP(hQ}36_oP~r=cd=E5v0k1IT^z!K)E*||-rs1*FZ|PRRe&{oo-uJ%#0BXsFd|8? z(ZTX5o>sd^;LyND(&Pl*hf&v_4EzLe1L>09y%cZF@&x-ocn^xnVG_SkQa=fzN$O`@ zd`5A_1{BC7K^(>MHW>s4U93jDDF;Cd)9I<#I2#}sEUCU<0mHVAfmBY-S4;! zF^nCg5RjednZDh2WyT#bYJ(YfV6IzUo6bN)8#5RQwq5}W0$nIjD(@RdR^Aw_tWcwi zZnv(pM(ZP5^jX+-EiSf}*hST{9tWK@x*kkISwy$OZqfQnx2)T!TMS(#e1x;)*8+G# zl5Nr$)pmklR~v~pqPuHvBI}R1Oza)40?llcDS6TORPwuSPmBf}q^^6y%ANL??%orj z+Fzj+?W4%6qA{AEfzB;M51+yJENUk8yc#pk7Z9H!Mm2M#T;7;L?~HK`T6zSn1$;2XoBhJrE{ zU{GwdGDRj*w1xs20}WMR(g^k;nVc$Dt)XC1g9L~P{pJwRs8_)p&_dJA0qhPKPRew?=PY1weHrc9v#MaEM7nz1GMM{ zI#pUX$kly$?A5j&Vb*&tHJt%{sTWy$%;Fms*&UZ#U7?s+RTfWJ{E~&mVsI&mkKDS* zdd88vEWSl?!ncrl9^S&0FtmIBfkt!>gYx;Eae)2 zA|KL(AfpE;T)ade+(ijr1WXfI~(x*y^F)w7aeNjt21Q&BLFgZH|5_TlFn} z`bj<_ZT~cweix~f?9(@^NLFV}seA~YBFbwqavxCucH?b_FDXuB*6YX;*XuV>Npr_+ zF+o$WXSwBi-LxZtfs}_AA~7F^Zqx~U#=4j&jKL(W3~!ikb`UBvGZS><$a5q&qi?Bfdh}-}lO{DA$yzLN^s0m-IUPq!zB6*w&i3CZ~ zAX~+ZPdZ|DBPPTGin<$iLKo!qd!PocSG_LOBA>KUh!R!jP;13IAWfREFe{QJAJS%v zOQs24?bwm&2N@i<*@WW?(jv>1sla+Yxw}9!*(jkdx2|UCy9RPKNF%g*X5gbpkH7;V zxhGjMGwQ;rj&KU323OYaudSJHf^hdnhJe-@;AztgK!#p92gIvgK`dL4{-Mqa^RCrT;95r4d< zR5!X&9JDh)j*FL0582P(B*v=ZE^RMSLO|M15XOS*P3!4>v(W;b%pFJ$F3{p3;&f~< zK|gr@c;aZ@WSAT#j#isM14l?Gv}E4PjAtT_k%{!IhZ_tz0nQmLSzUp8uVE92=5Yi8 zVL)>&6^@OU_+AGT4IvFUi%#rP9+{Vxo`tJ)*iez-Tj8!kzZWk`g%!TT9~Un0c|b_hT4S+2k+u;=sH8fo@9b0IDQJ%D`Bh< zK?38Un_zv;Vfw+k6NHnLXzl)!>iV;l>a%rOQLAmcF@pYftp+`xPEM&O0l4*okoZ#& z4M}Sb3qJ(nGg#UyUYCUULsK>B7z5~-Kz@)R34zFMHFb>7V_2IkUb3jOVBCMg5dkw3 zwHn`jYqcu~fg`?Ct4l@XTBjwq${PPQm?2>DfQD9#?4O|mlZUJ&U%6oZU{_I*hfYMY zZ)p<^#;kdD023>t2=uu92$O$m4k;^{)z=Lfr@^E zLcg%-EF1q}A(4zJ_ zD3Ow1@tl3o0Jt#FTir*LVN!MJU&gR6wBOXUy42y~XXqOQi89YZl#~R7j1L7a1WM+g zEXOVK?-%4WQBxz=CI5ZRkRxPFX4EB2{u-6ug#n+z#v~IVTz%nRM!XH?ArpbOZhXK` z$U?#@$y$ zJN9cRI|`HU23D-lpJzf>%;Ujj5?O_+Bu69@S*tMz3w2Aetkv3q(`~Uyajn%r@mfu?p*qik z)PjCmd-3%eemVAZ<^HPZ{5F2vwId>Kam&Feq;?WNBeN*P`Sd8G7e;eMW41CiA-^NE zc8M2fgswdQSv*b5XUf5E(w3OD#jgfM|Hss~Lt`~Ku&#sG$brph3_Xd7Wx)>>;RDL! zqmU$_5!~E_Uf-@-_nxe;ikbTcFQRLY*Q%@XVEEw4vz5n>g;Wn8uY9#`@kenMACP7E zX%NALOSS55qbk2~(yyg|%1Pu#BWQQLmQFdZKeO=AdjIR^)km`1y_Nfq)*r8|e{GTA znpbVszGBfrA?9?zwHhFZCDn$LdM7cYq~iYr|Dtigy{pCROHoL7(J_jbKm5y0> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/encoder.pyc b/simplejson/encoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e59d372a7ab88749c287a7a2a77dfad41ebd616b GIT binary patch literal 13938 zcmcgz%WoVu6UVGPCtt?yCBhuPSIZfHJC2EJw=^C=d zVfU!JMb#3`xaN zKxe3_sjlZ&Rp0mf9@YBQzmHU2|K^)Jb(MZjryf^SIHD|k&>9s%L0O-t?Mu>rKs^|IUi(4QUR%m9E3Z#| zjp-^Xee9RVq3m%$9*5PcQYCLt6}+;#Qt~Ru3^6l|%m_21$c!;Fj?6J;jw5q|nF(Z0 zsw)ph)aF}Ck1B7HW8Qf5ntCv%^oa7ul|H8Q7)!k=W*U~7{2EJo@^9+CKCZk|Lc|HB zC)9(J$~&!|VY4QccSbVkQc^AKj-FBvipqOQ=~EoSdzsZ=EA_>L(@LLF8Wk@q{fg4x zQy5jhsjS#LGY=BjivMeT=BqeFl}sVgQeInjV;v`vAFR~exbFL=!7v)EyKA~;T1D|G6g*tq zym)bLar3Rl;^Oq;@?v8#Sd12vNAVacs}E85X#3$W7VWv;dNdOcnGToOq0x907x28e z7<2kX^Dv$8VcI+_OUltJd}MK!^Hg@xc&W$?`$Rm$iAh2 z%uk(DaOx7i*YSacf#GCtmF18)Ll(Ezkd?^3 zw3kgeR~S3o?IWg$yLEs)r#TysQI*Pt{9L&y*IBcK#OyHG3I`d^C@U!_zbFTbO)sg( zWfh)K77w8>J%k;l0!tB}QidohbWi$~KhWU`9M+UqBwKK{_!^RoEdu-6!u30M=E9)4 z^+GHXbEtOGn7brP;S;~YEPsp)i4;b~aP}fExl1SEa``SY7(|flrfqyWZcBA@g!iCOODgBW zJ`VR)WJ=cjU5L~f%+T6O;rsyTzlqwIt5c;ntP@swJRy8{X^%SV_~Ds+h;O`zL_@(r zvXFvxpq{}-qUfRfb$|8`qBbc1P>W1SJu6e|gZZAJBUF38WT4v7azH)9I77-YycWZ1 zP&YRd9mM`}QyU4pvYI5V%NH&X$Ige*%2hiBGrJK+8610V;u4eFb>O#-&l_M~Xt!Em zwb)KpHCh8MH=y|JaQU&Wqtp#NJK1XK*k0bUJ>78I&E&G_`R3fg&;GJE1^l-&5~vB% z&$gjFd$hNu<(uyv{Om8w#`OGpl6RNPc1m_unvQvp%(>^$+4SRN)=t{c3F#o)wi`w6 zRyJft=L?L9EuXd9flod+!)&LjF&p&7>GT7T*$jh~S-a5;UF^fQ9kkb%b@YE9^Ip`> z7M%{VZG&67%3hq#TW&M%8UW*N+l`*G7ax<*k*(7Fa_!A#r|d<>6t=q;+L|H-USIVg z3L(szdbkq!UurvqU__Zb%-V5#x!!bRBN<{m(7d!=8E0wfd@3W=nWZIrUBeFY>)M(z0X~hZcz?g`sSiE56(GzjPteGLn%$5#Oby5M%ibyA{SF^RtGg4If+UCw9CV zwws<^cd-Q&;<(+?kgU10vBTbJZXG7NPerYf(X41A~2rVt6M^COMB^|rAxp90qn+SA<-0GS%n?8 zlU7?6veI@VH%PPwtB3Y7227%S_6+n5!-AcBI7I7QR|a48RdlZ>Fs^MK*Ikm(L;rpf zIy;%sumOvl^JXKOIeQmUU)A-sTHTN8?WP;`EG4iB(ebQJ^v4hNQ@tH0;d-j_yhB!F zW|UE}Zn8%{cBC6R!tp`Dd2PCUowH2Bd?>g$PesbX5AYlUoCgu29mVu2s~f964rwD5 z!!~5g4eYy5bp&kQ5CMaD@sRkrOFW_M&Vo72@d{;j0_QTe#p&qX4hVL*Q426;_n`*% zygNT@-!b%TnXQ=19>Mdn>U>&b5O}s5%bYm2dxW}Wz3M*ou_MTnS1C7k+|qR)4w69( zut2NngK1{-!Ilj()gg2^4epGv%z7AO1ox@CP>=jpV$+fKGE>ALrZM*-2%p)Fp8bHV zW$fYFpSjJpK__ymA>VA5XuSR63mc}E$#2g1fdFF4{c!%tG&nIg^0CwVcPPdXAQyHH z#`&qy1lScEXQSN|u8H&t3@17B1Q7yJ6o%vT)rjDmP0S#jGsb}DfNamrgdBS|_{Oz8 zcWW*Q=kUY5^7pOD4?J!yXmD{h!CatMa3W6u`Vin14Nwt;lG(&a$*IsL=wY@zRVC}1 z=0Uz}8$6vPTXQWiJ(n#9Nb2)J(&?M@nZYjC$ooQ&lrWZZ(d?mkda(X=l-PmZX!-%I z%j|qbU1JSYO~N3u1u)%uBL}@KpK_q~0Du}B<^tEjxJJ`q8(q6fOswUVvplrU<4iui z*mA<{IHve6=&&{wlv{!U;kVfupK2$Hk_Ey-=C^F=3aa0-f+>G#S(+7CE zzzP5a7^a1A!gH6IY9({pjdmc63W8uJfHLw=vx#FOc%?9%(qoT*o@WlwqzT+VCw`%wFUZfNwz(8gUhKS#gK$?t%2KJ%tZCcd3ik z`6}HxaZxj!D?Vtq0!v2Gq*oUubW3wQmM6l7Adk4SMns&G9Gu;IxilF;Q4ga^s41mW zsL57R6<43Ga;%#Tht8_=5|fviyuyS5PUlr5;(+I(5Ie82g2ADq4GRc;wHno5tp-Oo z4G)|zTb;iVYtkCQU!4XdzeRknvn^MEXq1WBATk5{Z3;iXgB(M)Y!|a-K`Ir6Da|wP{R_k#U&72DfE$O=#z)a62}Q$`&BrKK_y&^+Jz^-jVHKG5)fU; zTEVFj(F1sVZCQwSk_x0l3P6Y;Cj!Q;adaGFDJLnKxNuSVBWY0@{rxj6$?3TiFbKa- z!_#8O?*@8Z8jOkrOGikuO2BsHUV`9DWH1YHj@tAgj?id~yjpLse;ijP=ZRz-RYMQgAi$W0K1Xjgw-SRc!Hrj>nX!tA#)qR#G zd^R{$ev`=AB`RH-XTPeTwiD)oA+=AOI!sz!_E!u!hp3VDtvIZ&>E!%IgdSBHZ00aY z5)L7k@GF-!bI1@(1Y)rr*$tCi#Slr%4HLEtO$bnOnKv&cfQX-wH%gf)6X~GoR0|`2 zCA)xie#i#+9^u8`ASW@x5&aYLa)+uQ2PdpLv4iJID3^5-HLwEwBTIwjVbXw8gP#aH zf(L=sQVi}!@w6!ho~OkS5EKugSOQ?&3G7Mh47d>YTqQA;WP?&WNszk_+eJ#|<(dT{ z0I|w4w?8hUDh~q0+@ZU$xPiQ|tyd7>mtgTzy4K*SG(IH?H23l^+eUp7&^kW2!;KcU z7OWBjR3NQJUKL1#Vb~CJwgyL)d2UpKxaxOsMkWN1ME{H_!Zt!?Kl6Pt+`yW_~2_FXbZEOm0scYc6J*|c>xT7%=|{FwDuSxUsMb@jyK(Q&#% zA^kgi<2N{NrD9bI{n$aK61Ue{vF5PWT9xC#P66jU*#>GY)TE`sC8Yd$%fTx z>!H_f@{y~n)x5A?tC@}9mI$B7O@MPAk4A@bTCtX#cbFqN9X^rVQQYJ^G!$>)8&ie% zzua$`zp{1mQhB0$rq0(s_+>i+Sp2U@cJZ&Ec8hAaq;`kYZok?cP`iU_x2$$6YPV1A zj;P&HwR=qM9#^|3)b51ZJ*jpl)$Wwq#Vag$)x}c3tJLqI3#zQH(a<*^4YQa5F~x8; z=6Khz-D94jo5V3O@BctvPTSaX#=*NuMH%Om+B*d-p^(mtnM#50$Myr)KxRsU309vrkvHJ6IyDolA6_`4>9eD-f7c@VtHpxu;3obia3h2-Ep;t z7q0Twdswlt_@QlZiSjmvqFgvgi$`Kr2-uid3dOq#L!26*hS)t!^#nSL zA=WS%Xm8c&`LXXB5~>1%6x{5QEbL zsvoV4QaXX!&fg?VGMHtBKDa3+H)gRlTU4O1`tU2TMqbNd?KYU;P*@wz*qg8hArZSP zg|*=v*6>Q>eprJ{bzlu_56jKqzzZ;Ln3X8yk0m1>18mI?XAGRmn zxD5UR%QR)^g)(9RND;6>m*9^uBsv&KqiG8d3B^06ctVhl#qfPl+>l}}9H7Z!00@fj z=93hNyD3<4s>!aY@Kze&J_JHP`4<(u1nU>jftR}ky`WsCypb$Y2nZKUpAgHrAG4MS zHNYw%99ilrE(}XHi^V%%tOwq+3Nu*9r<9h3(!4ZXElUOZOj|%CSH)7|8b?Sc=w1}3 z!J@jRNgJ8%Wi__117&$~1V{FF`7dO)x}cl`<#-Tb*D| zq-mejQKa@uod}$WV@J3K(XN(W3jV*!)(uSjCzLG)AQ<*KR0O#(OJ%Q#&NWkyAav9A!X!83`|n^7BP$#5&1twW-nStC7(@AFvmX$9bO# zjdSM|61kNy!DxxuJf8)={q$wa3=3f7qjO*61g@A^*Hv07@jWE{)-WPu6L4RK7;)rp mVGM#ZW|bfaoxh>Ni0Ut#Ee>1#g>t!kdZaM<*Njmf_V?dj3=-%7 literal 0 HcmV?d00001 diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30d94445f0a0c941ee46b6c4fa3bd255e662f6ef GIT binary patch literal 2340 zcmb_dUvC>l5T8B&cj6S1v}wr$X-SKd%5~uhP(>h$k&IGTvRgn&D(iB$PA@s%o$fXz z5y=AtVOzZO)Mvf`?|cM43}2x9X3kDsMZDG4&dtuw@3*rvb9;aNTPd%;dewAj{43%4 z6-F|IaEW#x6}c82DcVtVx+v2O9a-dOXeUeR``{L3b&d|p6jnX8C{7O5ZHEFAz? zAg#zNlA9ByB(hAKY@MOa3jk)x&C{>gutGXZ5r}n#b~3zmr&{2M79hUuJZY_%@JI(M ziDL(Wj?3O_{909oRWl3Gw~uspyx7K^k~N5GZKJyJ#ly4RPimh(-*ea3)~b6C_T2kx z8`WLic)nY^|9nHH4ioX1!N_1FzeAi6c|@aBQ8X%x#iCJ>OoqHHjk03N(I_WMo<=26 z3N*4rDbh%ZQle2_lroJnG|16F(ZHfXi3VjFWN83no(4pN0u74Po8grIhTRJ^EFc*c z;%PZ7ix_-l&P?(LET?l!e5UBuxkXYKLsNw@ihfaPVa_aOJ+vrXCN-4f0ET2Qq42{D zU1X^fye7qd8Sz_%UvW&&em#p)*I|i5OL5o+BD{!IPQGzefF z6A7CA^0ai@Er@-d6dG`B1h^A~DXjQEu+jvEl1#%sOJU`!uo>QM_7a9@5dw^|o5F#m zm@p#hbC9APku$HIr9^j}vIAKFOyA@i3eWG3{ zIxkRwH)e>o$Wl5#EAS>B(n%yi{GG-=Cow+6Js|1g*nk3Pu^1&KZ#*i zWPmpauP=+>*!RU|WNPALEz=&74Hp(Y+fOta8&f7~dHkB9=1}c)nG}o)>uL zR9tHw-){+v+GIg47gH8jSD!TEIE+mN(~b$FrqQu&yfBbpT4A6?dCF07DBnmZd1wc5 zcpN1Xg$~@R?9cYZ#9nY9cF#SLkOF;ToELU1A@vPkZeC#YfsTc|7u!zCa}voj)=8Cb zBLVBc30-F7Lqv9*=q|v9*V9?g4{c*6TRYQBb{yNM<4Y0|bc5smJ~m}+xPPb}(|r+! zM`rGl%L#+T*r4ZICZ$guC0}yOcEiBYQ|sw@tMI2}1ET&c(Q#5g@?y{T!9(P#W zX0gKJ9*esW>9>Z5Q%*a@zrvmZWcT(e3tKW%O|TMg;kttU^v-rjDngP~o6NYe`? n)!s$2k|Nk1^+WgA*I)9HlN%6uHH!vYlm;tVdFUTlrBD9@kb}!4 literal 0 HcmV?d00001 diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 0000000000000000000000000000000000000000..f16ebf7cbd4f28620c0daba2f4a36ae0196b3d4c GIT binary patch literal 10819 zcmb`NXHZjX->;L91QJksO9Fy4X^NnNiVC_BuprxlbVKhX^w84?z4u7CGf5evn_!Tr3?d(NECJ2Pu0AJ)uTvu54bx_-a^t*&`j>8i;TfD`Z)060EAmb_Eg z)wf#RL@#+W)ka%x?pW*}*_fIC+&j2FF}pLjxVO0SWp(}A+xH6{y(=4A-?w-72Szu) zd^`O1{b+b%YyaTK$DxhUsqNjpgDLv%hfiy@)VYJh9~E^o`Bf9b$IM!4PoLaT)mD=~ zFUJ4`006?j5qF#|Ok6F@g*p)Y6Z7*nj+PjJ@F5rmKRHY0xC_AA8$W@FCrO7u?zADA-VZ`x?tC|{EeZ$+PS}L}+>0Lu3YulS{ zT4!^6L+^W9RZs87mhra9j_Iz!%2vK2CI%TW6b5HzImN~dVJ!O2!OQxY?dQ)g-}ZOc zHbx@}1$YJ+?Kz}{t&;P~OkgsY^BS?Vr*pM7Zz+VGy zGfn8J#m3_yoX+b#a zDpjI8oCMB@qx33`Xw_VU0cm=BragGsS+$bevLq%hnO&KCcBH=3mDfvuPp-Rdj{6&R zHa8Kz$xJs|?B}2IQQUsp?)jr!!0=#iu`;qPU^}gDYqibg#q;S@SNLF|EZL!#x_2db)8GULs#w(5+~toZv=1YdVas3W&uFU4AzBgx@vEBz#;l z-pu9tZ)?LyrFGoG)7w(&W-(&|Hk`4;RUNspd=I*5q2ess%;1CmgIS<4v~VcVWaPQH zANrnVeu}B%aK0|an0w{9^-)|O9DS)D9?uwbcai9O^ScC@pw(ezOGyY?wF-n!Sm#Ke zZy3)y(6~018z!}0`0yn?_&rY+<>YT(f~^#{5m)wlacRx^dP!x6)JAFbi0ww11o>n& z{9EsPXXod0%vV-{ohI1IX%fgQW4U^o;-lR1D2o$i=iuqZ>VxzPB9Jhr87P6lwqYLL zzAmxh0lsOW7;-|8F98?s9psNE+IvPo(qi06Ws!-wc9~e{KMO$+^=Bn?AdpZ9E6B?F z1A=K_u-m%NuW@h!26eF-W1SeBa_##tKQP?qva-5}9POT49d`BGnqS=b_~meVq;S`! zgb_=kIvvv?GI&6n9GF3ujY-D@NP@D-pFf91Cy^u1oI&?!IJb}(xK)nDnXbl&f}s>; z;#;8si@QnEHk&J3gSNcYVjbRBWRIc( zvKzVb5N~!MFj(-4jL_9=*-X>+JQKFctTLloj)kA> z9G&XPDujY_f!g_$c1eN~62X^~Q5s<^MdWux&z|6ZtNq_x%Nv)pIs% z<@aqc7|CWG#B)lO27spFDxV{ zdYr!DyZ`O|uSUbIfsI>M$}+#ezuJX`W_;eY)V^^4y_63+2J*_W0U% zepJ^$z`V=Il-pRMH-O)ERX<>)`;978*j-52{u7^zX^6D`^EA)}O5Ge(0N%+LbVWZ3 z7{O(%Xc&Ih8OP#|MC*!1{kR+_8YV)k6N#2`rwV(Ym*O{JWIvY#0x~KwnK+*jILrpH zgsEGa3u@KiQ+VRWO+tOg#nRJmpN$nKR4-XirTUoGoH4@A*S|=2kmW7Na^0?<&XIR7 zOh~8kdrdpif%1ohd7+!_eVX}T-u z5(Z#qmf?p!n&=XXnBierie$(vu4-qu#{&x`ScFc29Zax*vO;;4wG^3Ly>pi2dQBUe zbn0P&ztrG*zPEG!kCk45uPjJ2c}{EBa&mFKYwyEbh)Y9wN5z&wFSW8F5PKN_dY3b9 z;`7C$R?X?Lu(C_}IaUiLI}_}`M=#WHpaA=gENGdp>e$%g{UTdV>#4Ev@4p1+=Eh|Y zro1mdCCr%)mL1?cSM=v5Lq9Hsj%wJ?9j;^ro=bC<80f*9MQpkSM;v zL4@!y?`T^T#xa7`-zO=YObN=sCp&rsK!P*uQ*+WYZ3q>q-g%jJaSkZK)98~=ryUBy z2k%0_Pmlq!a)q!m4GqDWdi$e#2Zx7ykyeufGs7^bb-%U6(h%L%cXMWRWg-Z<_=RaD z#O82mXO+I86OmbWX%C*m%p0DiNZOz0kJ1LV0C>PqrE_SsDg%>{>IpYgh~2Rqg2z5A zl6$I}Fc>=JX*Y)`iIH17<*mx=<%c2iX8DHQdzM=1%E1)FU8;FN(#6;jGWN^Mz6`Zg z&<)Aqf{`qahF9kBr`Ecq!X-Ie#WAW3V6YsIbQW-t+~{$LdQAb4@DSW*OFC&20rFy& z2lu6B?cSk=GAiD6KiHjZ_Y_mQlBneTpfipWpo2P4u;}B|Sh(zvl(O1bEWwtawu#vs zDoxa*pm3GTgXO|jcs36RP#Groh?);nb#dXA(O`Z1h~(A`%ekL<(?k`t@zKbxR*vAa ze{vyYUvs+mw~hW<#_t);DWKTB-sc_>zRKTrezuR!80~y^%k=lqqYE7S#8-7r8!^k* zu@c*hu!cZ3ZM2|$xRF&72sAaU>RIVf`2g2N9%~6ACNs@m=ChA#32CY?oRJsOQZWma zc%1tn;!=!N3wm^FY!rGnMkz1+0{;Ffh|Fz;WC!gr9iD1 zMbiLDHYx$kZmpCK5pN38^Ko=!}~{zBHD+P|(rL2v)lL1JiZcvF1V8IYn36ul$2* zPXmQA!6NM!`ns)A&3bQWMdGq~84qeH)gynuzVDvLTL$P*=d`W^bhACum14kqbeDh~ zp_|ifzbq4R#1?r$S+!ur`v(`>1hn)7e!bfAk@|=bIq{*e2U%I=tP_;ClNTT5apGYplxKjaubZEPcYqHo($AKNbHXsNM!Sbn zpuPlK5683&f*%7Glan49m06IMkdz#S^YTh8!&k>)Q5LLLGDaR42)y$IEGJ~K4n6_P zAQODB*FVbU)6jIUwO{nq^e7bh(SKoaEM{_GXoP8VZhQJOYKy0duT_T$K{jZrJ6 zfw0{Fa!$S6Q1uq;)Ke8XJcRIsl`4m)RxJ=gjG{p(&pB06gdp>A&;ucY1u;hA@7ub& z=)8wX@>Vws`(G7+)8uU?3}IYiq!ez$5SjI=<^Du*7r}U{i|A0&xq$EHhYH0#xytqD zzomRr?TY89xoC~YI5bFd*^bjJG7If(LTG|fBTfvJ%7Y=zzH|H1k()r@ou!h> z^c2FBs_5;*M>-Ul@f1okzm#+hBjTc!zu%o>fp8XrgZ zb5XdTUO|K~2l0yP-o+uH3!TxU&-#3XzQP71v?a&Cg6fBP7-@|Vyo7}s6H2Sq;U2fEjE zkDvm0ep{%LSKY)_pu9H|@MIe5iAgYjPT~W35#u0~vye{i^DsWa#1<&+X<|=^;C5lQ zvC7R3vpc9)xY6LLX${jrkI!8K%G_r?vWb#kb?{IwdLZh)@Jt)k!<`T;XvQ36!|x(V zi?#Ip6kOet_Q75*--oAbZC)NW6`Yh@Sd?4ao}73o;S*{qpj zdE1rZ@Zr1()!az|g|P~%@v5CXO^{^wIxodG^0?|%->BW?r@++ABGReRtzteY-tnA{ zqd+k@7@MV1m^)G@97TuKCtcQDW)hApI2*O&|3fM@EF2k?v=R7oyFjHyjE2ZICx8BC z?*5$vD%_%g2AuT}riLuc< z#TDg;X0vJ@E_Km@`d`2|0+puvSj8QxKo3Mhz5?&)bGr2ku-sVsc$fL%W%-zkrwSl< zHeNm$8jpY2H@p-tP+oyKvQkx(#ybswbIwySsZ2k~MU_I6Ni5HdOqI@bAp~`ma%jS* ztH6w0g7yo-tx|--@k4~tnBoOvqrN16SW zgW4D&i&oMCH@wLn4sL>_qU_#o5H|HKzTHwb0SPzB<=lHsSRr_%;GihDqkEyKoD` zNyxD^)TZOfkwRjILv>ZLWU$1oV|{xSc@Yvw_(_QQ_@3CkI0;TezVVX)6oq#;Ew$m0 z=wiI2DSrUtSfM4bC4hdtRR%JpNSTapCqPq(L~#UJK=Y{Zsyo(kuYdH+TQb>O(< zlqymlin{i7`BlJ$PUA-{ov{8{`3uIqUq8R&tlRl^UihlR5|EJ3kg^z+A^@#CYa! zQj!Ntzarvlw}!R_4x<+Q!1HOj#=QEugnA$Yn--geg{2oX17fHD`T@uwL=hO5vemwv zJg>L%GEK5UEeB^VZYHSm&{;AO{9#w|p_Nwa8%ufn0T?e{l0KsD{m8u^$PoMRFuGvg z6TWU7NL!IC%8XH$4Ek7h#N%{^M-Nyur|B`4J52DfD&f4QR9u=b&sS>JR<6HkIi!Kz zi2hR4$qW71Y*l)k+c|f%QPjA1?@azVYkdItQJ>N@S!yDA0?M~)vk=tW7X*wJ+SWaP zFHTE(KDs-)c(nXhvs!U|P<_IALv`qAO7q3$wW8oMO_*o|du}T5T|YvpA{SWFxZTmY z?Jdd7pn=ANeD{7x5FPYjGnwIU(x!T_g81S%{qLIA8R*Bsg3v1$(!^3Qt&p!?N{gm}IRq=E{aKwWGg%&?lZQ5u8=5M0ZLcR} zml9LnJ$$VI$(s4)xf(Xi(pkxBJ^acK&NECv1`US2+@K z!jbFm1H!$Ugs;zF`-Gab#;#&WcaE2pH}SUSP_j411qlldfsuT?ph3a0;kYPrG^<-! zLVyoLN@{$P&*?08+pvf{EB~-!R7O$^-aUfi?Gj?Ij{6TvTu7|te^AoX!-VXP3x?2` zMn?OyLt)*cGvh_E(=#Leb45doOP@k*yW#8ojc!A`i&NQ!$hFVV)IruEVXA~|s)~#K zW|#=0))OED2eeX%PM9Du;|2fYD2#s`MN-vFJYgeoQSM1` z-njF^horN>V0+ImOX?}2_jzu>o{HU1d|g z@gsXTU!(Y5E4HPv-`7phyJOMpBzEqK!^h9ymf{MNLhbC$zx>ZKhm!3~hyhk0k1s}S zdg)~NTVGDrh^g{`rI4mx_sJi!U+o&iL^CVXD&MSOyl-77Z<%x&8f7-PerpP2JlH7v zbpJ+_vn!-^>vm?^^d=kx71ajXz_P@Xyh3Ef0mmd ze3o=dp2bKK#@)s`Qi16&r8%&GD^>`!)PkD}i6r12dWu}LI_n@2;uKNu^1b9`B*AwFL-4nvGGQJ0!6bA#Xq zK^qYUHL0p@cdyhD^vOm_6_EE6p)J0v2^*=DWBG(d11~rQlYT@~FKq~z>q?1KiGkWT z_EXg5!b&dpAX{3*HL7Z5ScC`w#)IyPI1W@)RuEZybk(ZiqV{qoCmM@y4=Z|*?jw6& zXAEEdkgeK=haXram+vgt!jDlU<%oXf= zjd?VxL3|vAs)Qk0@Or6x9Y<{?h(R0+fNcFrwGQF#ku92mTo50adK*xj5Z3TZKq)EY zK2^yuC{jv>?nzt0%}0NftyG?2zoUE5>$rA9$|puKgZgf<2YLVov3$A0VX#>KWBVVD zJazs1{h##nha(mLO+T4zpnt7S2>`XUNnYY0<9(Xq9~MKjgeF7<$9SOvtb;r}VNMCor~N%bb0QN13qnG$C3!*FF;$er z0!O5}1rS1^rm}!Y!W|U4;qUYmtTUqT6 zaah`#bPO5(0u3$PU+F*kzTB*uDjGgpi`^Le=7KK317RoatDME=Z9y39{{aeDGM(5F zAqYGU7qxK~xpx8-v|;e)#Sck*oTWWubg>?@YgQ?nhkGIq(`9`-SnfS;)y_O~Az~5s zY*|a0H7LhiTWEz=pva8sPY%St2SQ~`bwmG@C!F)Nbo@#xs201T)ulbUZP4W8meIzX zR8F>obJYG~=6hrcL2z)6d{AtWhcJmMjBF?wEcQAd3f>LV16tP;71XtSyej&J$4(Mf zc=l}w%dQ+f{( zPSwy0?^F8H6Ee5eE=*xxR~n_f6y_O+UKH%k4Md=|(QroNEIJV>*PmzDZDO4X)q89` z7T()cAci)&+rg5=q9S4FY$qS9l6uq*kx;lP4oHNwe@!5E)^H{d*TF05y$AXVu6<2 zf}g~IJ}?`oRcly2j+Zd2U^f=AFBT}BD6VQeo4OfXmcl4qO!Eq)9{ap6vQ0MX%&=-H z4f!Rp4r6o=XfXl-))vh!6eB01jKwV%8%NbB!eO-o8phH5EREBYE8D5L4aYTO>(rWZ zyUf6=6SR`3m(!zip_})DDzR;F@wq1O26Ed>D#ET6z`=x7f#8-5R-2PALXGls)fxf$ ztrlJY3WK=5OCCq5yO^KVi-&0Ge06oP}- zG~e`&j`7ueaPjq;HdRnRVF}+eEERY`{rQXbF2 zssTI7ci6=Nnu?bOc=Sj9@ z9s{y1Dpm0x((Ve_=f-!=Lc4}7CVV-ZzYCGeEe?cDdad!oRwY%8yc7Xfz$9tEJFo@H zpQV9NV3)$?;CPhy&_w^<;T+26GF>xUZjdG#4(56S4Biu?;zRB9RM;_aTcIVgtr34V z(V)-brMXy40u{8TCubSlYfl9vCQ6bPOs(Kn5^8(#tK#t=%k;u6Bb*8>vAEl7EDXai zb@mAVl!N>fQpD8)pTd`cR=Eh7U=YyFa6HLgmXgBb32c2vD4a4{IaT=a?@p?hqa-~; zU$$8)JxX8<&RSBvoRVk26(F@)l3yiJ0&b<;+l=hgk1DOmXlLYbYinJ9etfyN{+wai z2jA!6`4X2TxuEnPA}Oa78s3HJU(R(V$|Lh?CMGC zsG4<=l)!dze4Vz%ve;wm=p1anG36pPyq+Jz88yliqFE7XkTE6{`ucoKi3KitJigp= z=0@teVqPw2CIj%U`h+KSEC11wKRo&0>g0d5(EHD zUnYI16H34#y~)1*1R^UX%+Ji-BNRr7^&>=`Bp!yKM4S&984(3d4uDzN;qCH$66}Is zdd3DsRE0QsK^V;c!3l_i^>hm61Sm|1-d-d$yBgVj5_6bZQap#pMkcV;(=#n$7}&zt z(p=XGPCj`Jtc`9Au5Rxv?PY@w7eCz&$YG3}y-H*X;s3)Epb4CduP#fSs0NEgsEl5fJ6E7xf#wA@dUw=)&B*)yj_$#CwHVJ#y4QFl!8 z!IB(ny|Gtq^dG?8#5+Pb3Z28Go=Ma6U`$F}St`pA7CyGT@N@svB?=E-<lhO5|E@f1c-g)%4-6as`B$nM3B0$`o#9 zb+F`&Vnn)Q({ktR-3z}2cE4=5a%=Sa8F?)?z%P}0uuS{@+>6Mqh(LKZyVZ@gkMe@P zTvcqr#eDuV6Xvk41Va0!0#!i~O$h+I50y2lIInn~7v@OBx#xfV9KLJ?N^8= zO3Eb##*8pg@t!>BYJUImx^FMgr!0BQl9wOiqZP~tmeZUtt^EAPR+MB*a$2&@l#647 zm6etMdi^-w`K(8phTIhrK5D30>|ExHJ)F*LH{s)GCcloUGk?r`F~>0e}+8&W_xm?DZA%b zl_IKzw|2R%>>e!llzR&Xzx9i_a&XLmgCRZ1C|`d+0x5tL=w?qKhr=T6@x-`De=}%k z7#8N57?K14nQKXb#5 zPjnMOB?9Ci%zQ$BCdKWc9DVvbH8c5qNg%b1FcF z2of7wB}+xM|DUiU^q6qD7DVq$k5Uz#wB_SkoJNlB=r`Ika$cG0DML4TZ%}kAxBAwhl`K(e;;AnS&v+`X@ zdgZn62e02{+TFOabTrw%GV(`LfDG&^m4suntv6y+7hemV3=$h{lt+Q9Reh3pSoM+$ z<=5>HRuCi>3cp}B&0?`*)wj&H6uVuA_73;un>XgqI~bK0hX&;s^z8DPG75CXFZjgy zYUnbu6Yrwo(6iR;!8sSRP)<*XBUu0)4oi0L-$jEjBQtIE9)i4r6;&Da$_sm-X3#P5 zB!noH>N@nMJ`9R=ELe^kD|(H`;nsuk^6p2HiE1ehl+`TU`9zD$K^>CMlM%|nL;^EY zLb46)sfwtNfZT4*<+Fzxc}0EE`nIluet|QEpQo32aJl7hyYSi$HEGv4UkPAzeKXC@ zfQQ{6ue>CHr})+TS2P_`>$wf-10^G#G<$^B*#~F5=j1-v#5l53iNE7nvvm)zSL3YcQq`!7K5@OdZe^d>PEMBXgo8Sj2hPG z6GL8=rq~emG*JoanRhRBKVUEv8@f^wU_LL_7_0j literal 0 HcmV?d00001 diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ad4ca66a17637746a5c33e5a1cfc46e35754fac8 GIT binary patch literal 21792 zcmeHv2UwKH^Z&l}`q2ag8^(eim7pLN>;*+)iBgRs8VhO=>2e36#%@FrrA3LTNz^C? zMVis5!2-qxh=LspNE9QSl>5!zduQcPN%`ja|Jlb6yKmXq*?DJoXJ_X`h(HX&i9lw^ zR0pBYq1?SYcl~91gciU(Gc)eGgC0V8RtPn1%3U88z{|rC($!VJPa|~L5up{14#Oov zQz1N+j2tm!9Da(yXCZU--ZcXu>CKTt22SuNN@KlNyG}GYm1E;K!?)^yTPKTIXI70F z^-UgWI-;%qFFH>@xo^Dm*I#wEMnyGESP^15Y*rhCJl^uU$&5n}va?G{O1zYr(Iwf}wglz$%P9+ZT$XV*-Ln6V zJ59|HF@_vyId}UqME4|UyB2nKQHa!a=)Y|+A{;4Q-S%=69x?X5t{u9B=x~7k&z{q>evb`!UXIQN-_%iw8 z9Odkvo%bbmujyp)N5?-motgMJGPpe8uIrlgq?i(GbmX$4m$%Cr;^1(DIYSWfqh93> zqtux>eqIK)hc2~UoL(I5u>0_$!gZ52Jesv|?!6IL61Et08`Cmr$4uM1?6CAhi7{To z$0ZabPS1(!*NZoJ3*SuM)6*lvqOj_Rn1UxufAK3lYMI?4bN-=)MFE)$bDv!ZanBkn zJUXU2Cxt4ru6S6G-EXE>TF)Yf{xPl>oJ+i(4+^%v=q>%iIwr{ZbfToyk=emDEsE~X zO7tAv*G*O%y23!`n%|s7cMK*iz4ap7>4;Z)`N4o6;+1J*g7Z#1$n^+(eEE|9ow6Q< z%CaU?nlIY>lLsjk^!(9sx*cja`qJ66`?CyczbL%up8rK^aiQXQO8A5P6@wM@?IAt} z%Lo5n=v+W=vrd^8?;GHxo9ulm&3JtUJGVks<50Y>N^YN9`t^gv;U2fmb55R2PubJ= z`mAQR){N7$Lpw&D%F6bi)U5Nds$P2!&dJKx3o86-zRtES^Ib-YTy7Sm>`VQ0k#!1H zn&q(gme;MTeTuX5OKO(gNRK`caVnzQm}1{9kRY)=GYq(zV-~> z;YNFw#LSMLar>8<<~v(mO}bDyywm3EpLNVGxV$I!(6r#2p(n2$ij2xGSn2LkR$~(# zn|Wuu?%uR(%bOhvU+~L!vDLOyPu2QP`|5b<&{CecYt8+O`=2~&clFjDePFxYs9`^J zUgKZ6WA}@qj9k;QiXN3uO3&yW4PVGpese26wVQb>UY_HcdC#6YUi0_rlu`MOWAKct zzKYVL1K6^awksd5D+xYo$r~SdxqWPWf=x5!s1b8DQ(+s#U-h@Nlp{eZNf zy&-2ty~s{G-uk@NgX0fad#4d*C+?V@J3|;}6h4~ld?o8}+O4bMvF4&kS4X4J8pH9~ zEfiiJ&mS4*pNiZZerxO87Hj)wpW=c|lnx{TGfAL{*xYoo^Nz*?sO( zof*4#7g^oQn6=N!Gh&LR;5(O&UvctqaMt<$D&U};%DXk?A%%>v6eDEfZ4HEwNv`8}d1=5T>S`Pdn=PIm!R_ z$1VFqPWY7Uxos({nU(4GE`^mYqr6Xn&^j z!BU~`9wxe*j-qqs!{p?gH9?uB3y1z0CrU5cVnLiut@X{H_T&qX=X*W6wV3U2E_0m0 z{jJZ&Rm5dW!tFA`d(S@?G;8i=okQ0hCu9UJ>d=2yMf2VVhWr?HtTr!i&H$;mU(5KS z&i8jXCAI(?tNZjDo~|*A{VEyT;L5xQFCr(+Nd4hUpI@uiD3%XQW-@~|Tg)qXc-*Mq zLhln(%Z!JQ3oOn!d2Le3h0O8ugnir!(lU~1-J@5ZTAa9GA-=gXr|PQ3*xW72D5l`c z0g~jD88cCdbRCTMWQ$iF8XsI9QnhkH)o1H>=DmntJ6+*E9Dhj^v~g}(e`Vav zIltY0zOwW{LB#V%R>iqq<&O_snXsx=d49SgbGqO2(FHld!&9Hktcfa5$STZlR@^e+ zyB>bI_8n_m7u7}=Ou5G1zS~xgMn!FQKKJ=PLfoc}$L)m63$NNZOue0G;h|GHDR$dm+;%%GkOve&$!h`kTr16TKXlBO~Yjt>?@3Y@1be&q5IsyzkE! zPg{8{{N_;8#a>qIqIn*QbkEcCV9M(~C`~6cDM}uD$y1K#31CxSK5NjMUX93a#b#!s z5y3Z)Ki^u8b`gBOfgL)QW$wc_L$eWijMkMecq(W?^5v#-d3S;~iwPdI9ICO6)fz?e zAJmc?%J9^Atxc-V1jC*ejU4JQWbdFa!a6`1HrMD20N!zHOG>2@HKI`ljXKb%1OE#h zP$(4Wui%A3K>{cZfEK~w1*K4Y6i9d}zqTe3Q9czOlVrmE&@Kh-fR9uWK%oFA>UQfxr(&Fp|qd zNx3|X=J=Bnb@8cfUHgFhBz~tQLZJf@Iz9lQj6Mkc0=GILq(2NHm*WUIWpKGOlqVn4 zj!8!mnwWx+!)}CZ*MZ;K73i1(a3TPl>sxjC<{y@2QA5CESrXTqpwnbexM53Wm|-(z zph0V;xq!7{`9s|GB|DTllJAuIlATIjNg@E}^%{K!wUiX zO_hM%P^}#sl>#6@RX(|Za{>{062pt3H&YomZaf-6kn3y zD=|_olo-62CvK`RqrhuV+B%8ZNF|@$SSbX)SSBEcriKO3)aWHNHA+TP_MrgzD>$xX zDAzb1voM*@tiC5;p&#t}XHd5mml@hA8dQi6B7<{}zb9$k zhjaWgU&^Ds(gkcZ^kE&y0rZG+*~;~hd*|QdAIl#nN~6Efm->>r4}@tq;QdblyRk~h zZmr=nVRs1Bv?Is2HeIZb?yF}XCx7P{t|q0K$MM{S`=AEa0}WL7;1SnXmiG7o>p#q8 z&+#{)i6M(PS}Z=bY;gLE{A0egCG|7aIH5?Q<8U?qFc&A(GSswPoSwb>mfGmld;cHg zU)?U|A18q>pIP?|<{?-=wkUZ_@MVMy+i}lc8}l%rpaKJ+KLjvYu=ssE|9`;z^S1uf zCx4E$mw!o`r52u2t#7J6tg9f%_Jg^{G^`1KEe++1qyItvp`QfFQcls_T)Gx?C?3{= zo|8DPS)oP`j7ZwDFDqQpn-v7wutK>tD_jf+(SpVBj;uE;40IFep7-I_ud2D+`c>Eb z-%$Chz7wp|VFES|#_;MgA-l7dBKyu%^Dn|WfXm%k!Q3z%W91^Lp)yEnr1S&$zXt*E ze38TuYzLM7Q1kcm&y4|^8tN2&gA=0}tU8I+*4e&e< zaCx4%$1B_7&OZ)tW=YFsy zo@2X|B)d;g^Z#-U#(G2(gb*NjA1HW-0V9zl2-co2Nz}-{9{aNa@_)I0>G9dMU;}M} zy}+g#0lVr6Nw(?7@vpJtK3@Ksd3VGs`@cT>`6Kw(*u*OPfh5U}BRJWE9nFiag+0y* z2-Y&@I9Bx$S0L4(Ab&5Qu4=8uG+_Vo-G|GcR?XFTO?qXoY3#Pwb4;1D^%#1(%6=2D zYs!VZ1hAneZNg7MUH;PA3;l$D2z{XZKgNCrx_F;}h1$Xr@1X^-9|jv5_F)^IkVH2J z{7l0?2!g`8{4v1ege3?Gi~}C%|IYWz-!+btH;ebMU^-#hV*61ATn6C%Er!O%=IgNZ z{7h_na^p5m$4eLHoV>hh~w1fl;4 z-fQtyd#?zEkllo3|0hXw8o||h#XrbVSD&Q18hjkOG$E;OP_MHCU%E%bzxMeb%OCFz z1rTG2=WHxPo>&dI7U&R!PBW4)?M{=nU-D?5OqDN%-3WW%h)N+ZPDxV^Uvs>_w3$^M zZb!4@W{3^Wd43$u;yTmC2B#N@o0Q@8*GFPd{VxAlcALR}5|YPI6C?0Go}ng3!QOl; z_DQG#W%unY?Ue%i!yMpOHOC8h>tW6WpAg>n(r()k;zQpx`*zjXQ}N%G5qJbijcWp= z##I2ESAQr1;awl8;fwhaqZbP#1{Jf!O`m}O!4mvelf>Y&;DqByXj)&Z!g;Lf) zENRk0xwJ`3Wq6Zj%Ebm|%8o*|5B47#sQiZsNP_8 zaNdtXzV&35<*EFXEGwX){En{^dSe3)*T{wfoB@#?5ZVNYXp7JWpw|L|eg`5n_7LD8 zLXMDQsIIl-4#^1F??q@JjPovwVSTa(I-oDAgk`Q?H@G=leP5M@fd4#57Sdn@L)}-9 zAxuvuUnY{v)-`(z&EZ-L)|$#?EA+6>Tqg4Y_yIz&|C|Eqnzp z8s*Zc1An6qyuOw-mK!DTu@V5g3z-4z0EDUpg*sB>I#;K?^RLfkYJ2H@@hzH|8XOM; z@2cz|ZSgeq#?1Un-yPeE*lv8UJnM=fdHdd~&`{IJuq8cLhlV!C&qC!J+lI=wb`6zp z>GCF`I?3hh;hg}mxBbE1*4Wx$!>i+J-sY)}q4CZw2v(S5hfuaLLTW#;T|C61WC4DI z7?jsA=>$TflMu364SRXDPmqhN&=7sQ{kmc}-o7_PthXwpNps~$ss2X%Oeze!q$}C3 z+TY=rl2_OT_YMH|Dy5<&xdOiBSO`mbV~_WC`x?IL%lojxCEjojF@Pt6enHP@A2|Pt zsAj0oH&SGu>95dl7ERiR@tD91LSDR*&nzz{2!qzI`-K|PzAYHMK z!S|{pi1x6rd^eHAC$>EBQQ*Bj_!_=_{f)n~eUj)gk@JtK{AX&v8Fvm&5?viIkKl8t zB#_uh!^7X(zQt!6x^L#3Pl(4X&IMm1oNx4=qIs^ZeF8e74R8*)tS_p<7q~q2DB&clOi<@JYrtqR*8M9WWc`+R17WqtD3Jkyr9M-?E zy5tY*&!D>M5JYPmSOd>ve=_*N(_Vk)ZTZ8Yi*O)MyrDig3w3|{N@Dl|*56f_7LQ&8 z=cf_X5K9?Bk=9dR0k6MfmhzZ@^W6Ho`VW$5hJSW~zo{y{-M%i%Gg@ubN?9M4<9Jr^ zOWRBJw{Z4H=tW)ecBRhSP{p(ESKA-UuM7CLE-XvD&;Ag1LnyI6SRk~k2SPtc5mK*X zgA*VQIQv~-n`{jDj(e?zDQ$?l%PM_Pm*3GpxlBeZlgo8J1WROcK4*VMnumtQ^aS(> z4UK*U`Bn?L{F_!>ytJRn-z$^JU)r9y?%VSJhps=^{>J+Wb<95AW7G?H7yEV9`;RKv zf7FHgWA^`{{{CBX{ePw%&3O3#HOGDu_|I%qzsYO#le~YQu5H}br#}=Tc$N%*K|fZq znol4hN%V4p*uyO}^%=z1k6+7B<5n}&XR9C<-=BcERzh_ijwxzlZM0HXt!E4(bex2e z%sJwwB{~Gsr>JS$MEIPRje|4t1f`I-9f0%i;kE{AT+Dx(WL*QSRqNc>bS+f!m&0slvGhrKsM`N2}6T~}Wy9Xy6L!dnYeepzr zBz1<^_yO<_^xdN|eXxz-=QR}6#jyuCW{D5+2YA0^K2*ql4}EZo;;6pt1GdRD`$HTX zgv%rg^2huG{fC@<++#TUX1%!jn(xTjg9MToK}^669PoU2ejd%jrznTN>e zmM-Wg3Gx@I^nHUEH?}nGyc}Pk z{~GaFpG zV|(!U>M~l$?uPN@7LTtrI>nXW{rj5!KjP$r=Lfu>G41m9+XT_3ACHk2aQ+@kC+<1T z7ZIe%1c6TKIDt;`c!5p|;IJAg69l?P=8E-8K|gPD`subs1pL#KTO3U~EK`AUTmio^ z_@2)!DT3b{z#QtL=ADam)FY_&GSE%X&zi-Mt5F*QWr*Xc4UjfbdWZ}$eHbQKKcm4X z76oxk_%~Jf_dIbBXS1zVz-)n7zR8<4bCIThcz@J}V#a(zG4_DbZ-O1&o@U0GQG9#w zVUPsbb3CjmYjJ!yO^pdx{l1H|52eU{vk8RfRMi*;9l_HXm$a>GjrTd5KqIIBN{$C5 z4!$x2{Fm)EU=Jsio`D_2#~0?b(9J=>+fERztOQKRRhVb4@tLK!0JkBQGKC5`Ok_0$YZ0DrX=;c+p9#$sFkcrzY#RP;r6y_W z3-ApKMzX+F$fiR5;R0sKEugH9+f=R31fTi(HbaeFs;P|UIVj_GUW)JaWF)>*gzQlq zHz{DSPgBPfOaY)Fg`*L$XQ*Z%T{FxFLcIU0BXcNm6a+BNZY z{aMkf86y2Vu%`a*Ce%3xcHW=PLiP|%cD4szp}r8S0wk^>s^F~7hHtw5sK_I3SvMJ(*k$AS%&NzlenDI=4?{qZs&8V+N&-0fbq4vZC;=aZ>OTH$Am)Oh++xAE za{`y4VF->UBqSmrNSADa?}7aNUVXo1_ra_I`;F5TEIWu-1iPV^3x2mH#SHcZpRSk0 zupEBR3%S7`{!&w=yO5Vk6CFn3I9?tde2GVgv$f97};1{7vw=p2(ndgMi5}h z2o_i|LSL}s=UXwNfc6yp- Date: Mon, 22 Nov 2010 07:06:29 +0000 Subject: [PATCH 77/94] cvreating a special tag so we can quickly restore to previous version From dd483db783e30574dde166c88cf21d7705b6c65b Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Mon, 22 Nov 2010 13:24:50 -0600 Subject: [PATCH 78/94] Move books directory down into sub dir next to CLI. --- {books => fanficdownloader/books}/place holder.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {books => fanficdownloader/books}/place holder.txt (100%) diff --git a/books/place holder.txt b/fanficdownloader/books/place holder.txt similarity index 100% rename from books/place holder.txt rename to fanficdownloader/books/place holder.txt From ecdeef4b3b81a2965ef5032191321120a8c641a0 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Mon, 22 Nov 2010 15:00:00 -0600 Subject: [PATCH 79/94] Add new adapters to main.py, update verbiage on index, replace a couple prints with logging.debug. --- fanficdownloader/fictionalley.py | 2 +- fanficdownloader/ficwad.py | 2 +- index.html | 103 ++++++++++++++++++------------- main.py | 10 ++- 4 files changed, 69 insertions(+), 48 deletions(-) diff --git a/fanficdownloader/fictionalley.py b/fanficdownloader/fictionalley.py index 332a08af..68cd36e4 100644 --- a/fanficdownloader/fictionalley.py +++ b/fanficdownloader/fictionalley.py @@ -249,7 +249,7 @@ class FictionAlley(FanfictionSiteAdapter): logging.debug('li chapterlink not found! li=%s' % li) - print('Story "%s" by %s' % (self.storyName, self.authorName)) + logging.debug('Story "%s" by %s' % (self.storyName, self.authorName)) return result diff --git a/fanficdownloader/ficwad.py b/fanficdownloader/ficwad.py index 13e7a45d..058528bc 100644 --- a/fanficdownloader/ficwad.py +++ b/fanficdownloader/ficwad.py @@ -170,7 +170,7 @@ class FicWad(FanfictionSiteAdapter): logging.debug('self.numWords=%s' % self.numWords) - print('Story "%s" by %s' % (self.storyName, self.authorName)) + logging.debug('Story "%s" by %s' % (self.storyName, self.authorName)) result = [] ii = 1 diff --git a/index.html b/index.html index 4987804d..f5736129 100644 --- a/index.html +++ b/index.html @@ -2,7 +2,7 @@ - Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com to epub and HTML to Stanza, Kindle, Nook, Sony Reader + Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org to epub and HTML to Stanza, Kindle, Nook, Sony Reader @@ -33,23 +33,11 @@
    - Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites - much easier. - +

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier.

    +

    To support new features, such as including story summaries, + the URL you need to use for some sites has changed. See below for example URLs for each site.

    +

    Or see your personal list of previously downloaded fanfics.

    {{ error_message }} @@ -66,23 +54,24 @@
    -

    Login and Password

    -
    - - If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide - your credentials to download it, otherwise just leave it empty -
    -
    -
    Login
    -
    -
    - -
    -
    Password
    -
    -
    +

    Login and Password

    +
    + + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide + your credentials to download it, otherwise just leave it empty +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    @@ -100,7 +89,41 @@ {% endif %}
    - Few things to know, which will make your life substantially easier: +
    +
    fictionalley.org +
    Use the URL of the story's chapter list, such as +
    http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for + fictionalley.org one-shots, such as +
    http://www.fictionalley.org/authors/drt/JOTP01a.html. +
    fanfiction.net +
    Use the URL of any story chapter, with or without story title such as +
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
    http://www.fanfiction.net/s/5192986/5/. +
    fictionpress.com +
    Use the URL of any story chapter, such as +
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
    http://www.fictionpress.com/s/2847338/6/. +
    twilighted.net +
    Use the URL of the start of the story, such as +
    http://twilighted.net/viewstory.php?sid=8422. +
    ficwad.com +
    Use the URL of any story chapter, such as +
    http://www.ficwad.com/story/75246. +
    harrypotterfanfiction.com +
    Use the URL of the story's chapter list, such as +
    http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
    potionsandsnitches.net +
    Use the URL of the story's chapter list, such as +
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
    mediaminer.org +
    Use the URL of the story's chapter list, such as +
    http://www.mediaminer.org/fanfic/view_st.php/166653. + Or the story URL for one-shots, such as +
    http://www.mediaminer.org/fanfic/view_st.php/167618. +
    + + + A few additional things to know, which will make your life substantially easier:
    1. First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password @@ -111,20 +134,12 @@ — how to read fiction in Stanza or any other ebook reader.
    2. - Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, and twilighted.net. - (fanficauthors.net withdrawn as they offer native ePub functionality now.) + Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, mediaminer.org and twilighted.net. + fanficauthors.net and tthfanfic.org offer native ePub functionality.
    3. You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader.
    4. -
    5. - Paste a URL of the first chapter of the fanfic, not the index page, except for fictionalley.org. -
    6. -
    7. - For fictionalley.org, you need to use the URL of the story's chapter list, such as - this. Or the story text URL for fictionalley.org - one-shots, such as this. -
    8. One-shots, fics with a single chapter, are now supported.
    9. diff --git a/main.py b/main.py index 1ca7dcb2..1ae0ac99 100644 --- a/main.py +++ b/main.py @@ -184,12 +184,18 @@ class FanfictionDownloader(webapp.RequestHandler): adapter = fictionalley.FictionAlley(url) elif url.find('ficwad') != -1: adapter = ficwad.FicWad(url) - elif url.find('fanfiction.net') != -1 or url.find('fictionpress.com') != -1: + elif url.find('fanfiction.net') != -1: adapter = ffnet.FFNet(url) + elif url.find('fictionpress.com') != -1: + adapter = fpcom.FPCom(url) elif url.find('harrypotterfanfiction.com') != -1: adapter = hpfiction.HPFiction(url) - elif url.find('twilighted') != -1 or url.find('potionsandsnitches.net') != -1: + elif url.find('twilighted.net') != -1: adapter = twilighted.Twilighted(url) + elif url.find('potionsandsnitches.net') != -1: + adapter = potionsNsnitches.PotionsNSnitches(url) + elif url.find('mediaminer.org') != -1: + adapter = mediaminer.MediaMiner(url) else: logging.debug("Bad URL detected") self.redirect('/?error=bad_url&url=' + urlEscape(url) ) From cf0d1bf09be23668c6495c9e74c321d1a07177cb Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Mon, 22 Nov 2010 20:05:58 -0600 Subject: [PATCH 80/94] Change output names for web version, make html & text output work for CLI version. --- fanficdownloader/downloader.py | 12 ++++++++++++ fanficdownloader/output.py | 13 +++++++++++-- main.py | 5 +++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/fanficdownloader/downloader.py b/fanficdownloader/downloader.py index cb4ca7d4..17078430 100644 --- a/fanficdownloader/downloader.py +++ b/fanficdownloader/downloader.py @@ -62,6 +62,13 @@ class FanficLoader: self.standAlone = sa return self.standAlone + def getOverWrite(self): + return self.overWrite + + def setOverWrite(self, sa): + self.overWrite = sa + return self.overWrite + def getAdapter(): return self.adapter @@ -153,6 +160,8 @@ if __name__ == '__main__': writerClass = output.EPubFanficWriter elif bookFormat == 'html': writerClass = output.HTMLWriter + elif bookFormat == 'text': + writerClass = output.TextWriter if adapter.requiresLogin(url): print("Meow, URL %s requires you to haz been logged in! Please can I haz this datas?" % url) @@ -167,6 +176,9 @@ if __name__ == '__main__': loader = FanficLoader(adapter, writerClass) loader.setStandAlone(True) + if bookFormat != 'epub': + loader.setOverWrite(True) + try: loader.download() diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index af6788ce..ff5daa51 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -60,6 +60,7 @@ class TextWriter(FanficWriter): return '.txt' def __init__(self, base, adapter, inmemory=False, compress=False): + self.inmemory = inmemory self.htmlWriter = HTMLWriter(base, adapter, True, False) def writeChapter(self, index, title, text): @@ -67,9 +68,17 @@ class TextWriter(FanficWriter): def finalise(self): self.htmlWriter.finalise() - self.output = StringIO.StringIO() + self.name=self.htmlWriter.name + self.fileName = self.htmlWriter.fileName.replace(".html",".txt") + if self.inmemory: + self.output = StringIO.StringIO() + else: + self.output = open(self.fileName, 'w') + self.output.write(html2text.html2text(self.htmlWriter.output.getvalue().decode('utf-8')).encode('utf-8')) - self.name = self.htmlWriter.name + + if not self.inmemory: + self.output.close() class HTMLWriter(FanficWriter): diff --git a/main.py b/main.py index 1ae0ac99..9a9cbf31 100644 --- a/main.py +++ b/main.py @@ -230,7 +230,8 @@ class FanfictionDownloader(webapp.RequestHandler): ext = '.html' if format == 'text': ext = '.txt' - files = {makeAcceptableFilename(str(adapter.getStoryName())) + ext : StringIO.StringIO(data.decode('utf-8')) } + logging.debug(data) + files = {makeAcceptableFilename(str(adapter.getOutputName())) + ext : StringIO.StringIO(data.decode('utf-8')) } d = inMemoryZip(files) data = d.getvalue() @@ -265,7 +266,7 @@ class FanfictionDownloader(webapp.RequestHandler): fic.user = user fic.url = url fic.format = format - fic.name = self._printableVersion(adapter.getStoryName()) + fic.name = self._printableVersion(adapter.getOutputName()) fic.author = self._printableVersion(adapter.getAuthorName()) fic.blob = data From ebcce7e42d6b98581f69152caebfdcb176af2743 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Tue, 23 Nov 2010 07:15:18 +0000 Subject: [PATCH 81/94] Adding Mobi format and making final configuration changes before uploading a beta version. --- app.yaml | 8 +- fanficdownloader/downloader.py | 2 + fanficdownloader/html.py | 121 ++++++++++++ fanficdownloader/mobi.py | 344 +++++++++++++++++++++++++++++++++ fanficdownloader/output.py | 67 ++++++- 5 files changed, 533 insertions(+), 9 deletions(-) create mode 100644 fanficdownloader/html.py create mode 100644 fanficdownloader/mobi.py diff --git a/app.yaml b/app.yaml index 6f1ccdc8..8709ef3a 100644 --- a/app.yaml +++ b/app.yaml @@ -1,13 +1,9 @@ application: fanfictionloader -version: 2-5-5 +version: 2-6-beta runtime: python api_version: 1 handlers: -- url: /generate_mock_data - script: mocks/generate_mock_data.py - login: admin - - url: /r3m0v3r script: utils/remover.py login: admin @@ -25,7 +21,5 @@ handlers: - url: /static static_dir: static - - url: /.* script: main.py - diff --git a/fanficdownloader/downloader.py b/fanficdownloader/downloader.py index 17078430..c31f1a4b 100644 --- a/fanficdownloader/downloader.py +++ b/fanficdownloader/downloader.py @@ -160,6 +160,8 @@ if __name__ == '__main__': writerClass = output.EPubFanficWriter elif bookFormat == 'html': writerClass = output.HTMLWriter + elif bookFormat == 'mobi': + writerClass = output.MobiWriter elif bookFormat == 'text': writerClass = output.TextWriter diff --git a/fanficdownloader/html.py b/fanficdownloader/html.py new file mode 100644 index 00000000..2c14a58d --- /dev/null +++ b/fanficdownloader/html.py @@ -0,0 +1,121 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + +import re +import sys +import StringIO +import urllib + +from BeautifulSoup import BeautifulSoup + +class HtmlProcessor: + WHITESPACE_RE = re.compile(r'\s') + # Look for + BAD_TAG_RE = re.compile(r'<[^>]+<', re.MULTILINE) + + def __init__(self, html, unfill=0): + self.unfill = unfill + html = self._ProcessRawHtml(html) + self._soup = BeautifulSoup(html) + if self._soup.title: + self.title = self._soup.title.contents[0] + else: + self.title = None + + def _ProcessRawHtml(self, html): + new_html, count = HtmlProcessor.BAD_TAG_RE.subn('<', html) + if count: + print >>sys.stderr, 'Replaced %d bad tags' % count + return new_html + + def _StubInternalAnchors(self): + '''Replace each internal anchor with a fixed-size filepos anchor. + + Looks for every anchor with and replaces that + with . Stores anchors in self._anchor_references''' + self._anchor_references = [] + anchor_num = 0 + for anchor in self._soup.findAll('a', href=re.compile('^#')): + self._anchor_references.append((anchor_num, anchor['href'])) + del anchor['href'] + anchor['filepos'] = '%.10d' % anchor_num + anchor_num += 1 + + def _ReplaceAnchorStubs(self): + # TODO: Browsers allow extra whitespace in the href names. + assembled_text = self._soup.prettify() + del self._soup # shouldn't touch this anymore + for anchor_num, original_ref in self._anchor_references: + ref = urllib.unquote(original_ref[1:]) # remove leading '#' + # Find the position of ref in the utf-8 document. + # TODO(chatham): Using regexes and looking for name= would be better. + newpos = assembled_text.rfind(ref.encode('utf-8')) + if newpos == -1: + print >>sys.stderr, 'Could not find anchor "%s"' % original_ref + continue + newpos += len(ref) + 2 # don't point into the middle of the tag + old_filepos = 'filepos="%.10d"' % anchor_num + new_filepos = 'filepos="%.10d"' % newpos + assert assembled_text.find(old_filepos) != -1 + assembled_text = assembled_text.replace(old_filepos, new_filepos, 1) + return assembled_text + + def _FixPreTags(self): + '''Replace
       tags with HTML-ified text.'''
      +    pres = self._soup.findAll('pre')
      +    for pre in pres:
      +      pre.replaceWith(self._FixPreContents(str(pre.contents[0])))
      +
      +  def _FixPreContents(self, text):
      +    if self.unfill:
      +      line_splitter = '\n\n'
      +      line_joiner = '

      ' + else: + line_splitter = '\n' + line_joiner = '
      ' + lines = [] + for line in text.split(line_splitter): + lines.append(self.WHITESPACE_RE.subn(' ', line)[0]) + return line_joiner.join(lines) + + def _RemoveUnsupported(self): + '''Remove any tags which the kindle cannot handle.''' + # TODO(chatham): tags to script? + unsupported_tags = ('script', 'style') + for tag_type in unsupported_tags: + for element in self._soup.findAll(tag_type): + element.extract() + + def RenameAnchors(self, prefix): + '''Rename every internal anchor to have the given prefix, then + return the contents of the body tag.''' + for anchor in self._soup.findAll('a', href=re.compile('^#')): + anchor['href'] = '#' + prefix + anchor['href'][1:] + for a in self._soup.findAll('a'): + if a.get('name'): + a['name'] = prefix + a['name'] + + # TODO(chatham): figure out how to fix this. sometimes body comes out + # as NoneType. + content = [] + if self._soup.body is not None: + content = [unicode(c) for c in self._soup.body.contents] + return '\n'.join(content) + + def CleanHtml(self): + # TODO(chatham): fix_html_br, fix_html + self._RemoveUnsupported() + self._StubInternalAnchors() + self._FixPreTags() + return self._ReplaceAnchorStubs() + + +if __name__ == '__main__': + FILE ='/tmp/documentation.html' + #FILE = '/tmp/multipre.html' + FILE = '/tmp/view.html' + import codecs + d = open(FILE).read() + h = HtmlProcessor(d) + s = h.CleanHtml() + #print s diff --git a/fanficdownloader/mobi.py b/fanficdownloader/mobi.py new file mode 100644 index 00000000..4facb556 --- /dev/null +++ b/fanficdownloader/mobi.py @@ -0,0 +1,344 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + + +import StringIO +import struct +import time +import random +import logging + +from html import HtmlProcessor + +# http://wiki.mobileread.com/wiki/MOBI +# http://membres.lycos.fr/microfirst/palm/pdb.html + +encoding = { + 'UTF-8' : 65001, + 'latin-1' : 1252, +} + +languages = {"en-us" : 0x0409, + "sv" : 0x041d, + "fi" : 0x000b, + "en" : 0x0009, + "en-gb" : 0x0809} + +def ToHex(s): + v = ['%.2x' % ord(c) for c in s] + return ' '.join(v) + +class _SubEntry: + def __init__(self, pos, html_data): + self.pos = pos + self.html = HtmlProcessor(html_data) + self.title = self.html.title + self._name = 'mobi_article_%d' % pos + if not self.title: + self.title = 'Article %d' % self.pos + + def TocLink(self): + return '
      %.80s' % (self._name, self.title) + + def Anchor(self): + return '' % self._name + + def Body(self): + return self.html.RenameAnchors(self._name + '_') + +class Converter: + def __init__(self, refresh_url=''): + self._header = Header() + self._refresh_url = refresh_url + + def ConvertString(self, s): + out = StringIO.StringIO() + self._ConvertStringToFile(s, out) + return out.getvalue() + + def ConvertStrings(self, html_strs): + out = StringIO.StringIO() + self._ConvertStringsToFile(html_strs, out) + return out.getvalue() + + def ConvertFile(self, html_file, out_file): + self._ConvertStringToFile(open(html_file).read(), + open(out_file, 'w')) + + def ConvertFiles(self, html_files, out_file): + html_strs = [open(f).read() for f in html_files] + self._ConvertStringsToFile(html_strs, open(out_file, 'w')) + + def MakeOneHTML(self, html_strs): + """This takes a list of HTML strings and returns a big HTML file with + all contents consolidated. It constructs a table of contents and adds + anchors within the text + """ + toc_html = [] + if self._refresh_url: + toc_html.append('Update Reading List
      ' % + self._refresh_url) + body_html = [] + titles = [] + + PAGE_BREAK = '' + for pos, html in enumerate(html_strs): + entry = _SubEntry(pos+1, html) + titles.append(entry.title[:10]) + toc_html.append('%s
      ' % entry.TocLink()) + + # give some space between bodies of work. + body_html.append(PAGE_BREAK) + body_html.append(entry.Anchor()) + + body_html.append('

      %s

      ' % entry.title) + body_html.append(entry.Body()) + + # TODO: this title can get way too long with RSS feeds. Not sure how to fix + header = 'Bibliorize %s GMT' % time.ctime( + time.time()) + + footer = '' + all_html = header + '\n'.join(toc_html + body_html) + footer + return all_html + + def _ConvertStringsToFile(self, html_strs, out_file): + try: + tmp = self.MakeOneHTML(html_strs) + self._ConvertStringToFile(tmp, out_file) + except Exception, e: + logging.error('Error %s', e) + logging.debug('Details: %s' % html_strs) + + def _ConvertStringToFile(self, html_data, out): + html = HtmlProcessor(html_data) + data = html.CleanHtml() + records = [] + title = html.title + if title: + self._header.SetTitle(title) + record_id = 1 + for start_pos in range(0, len(data), Record.MAX_SIZE): + end = min(len(data), start_pos + Record.MAX_SIZE) + record_data = data[start_pos:end] + records.append(self._header.AddRecord(record_data, record_id)) + record_id += 1 + self._header.SetImageRecordIndex(record_id) + records[0:0] = [self._header.MobiHeader()] + + header, rec_offset = self._header.PDBHeader(len(records)) + out.write(header) + for record in records: + record.WriteHeader(out, rec_offset) + rec_offset += len(record.data) + + # Write to nuls for some reason + out.write('\0\0') + for record in records: + record.WriteData(out) + +class Record: + MAX_SIZE = 4096 + INDEX_LEN = 8 + _unique_id_seed = 28 # should be arbitrary, but taken from MobiHeader + + # TODO(chatham): Record compression doesn't look that hard. + + def __init__(self, data, record_id): + assert len(data) <= self.MAX_SIZE + self.data = data + if record_id != 0: + self._id = record_id + else: + Record._unique_id_seed += 1 + self._id = 0 + + def __repr__(self): + return 'Record: id=%d len=%d' % (self._id, len(self.data)) + + def _SetUniqueId(self): + Record._unique_id_seed += 1 + # TODO(chatham): Wraparound crap + self._id = Record._unique_id_seed + + def WriteData(self, out): + out.write(self.data) + + def WriteHeader(self, out, rec_offset): + attributes = 64 # dirty? + header = struct.pack('>IbbH', + rec_offset, + attributes, + 0, self._id) + assert len(header) == Record.INDEX_LEN + out.write(header) + +EXTH_HEADER_FIELDS = { + 'author' : 100, + 'publisher' : 101, +} + +class Header: + EPOCH_1904 = 2082844800 + + def __init__(self): + self._length = 0 + self._record_count = 0 + self._title = '2008_2_34' + self._author = 'Unknown author' + self._publisher = 'Unknown publisher' + self._first_image_index = 0 + + def SetAuthor(self, author): + self._author = author + + def SetTitle(self, title): + # TODO(chatham): Reevaluate whether this needs to be ASCII. + # maybe just do sys.setdefaultencoding('utf-8')? Problems + # appending self._title with other things. + self._title = title.encode('ascii') + + def SetPublisher(self, publisher): + self._publisher = publisher + + def AddRecord(self, data, record_id): + self.max_record_size = max(Record.MAX_SIZE, len(data)) + self._record_count += 1 + self._length += len(data) + return Record(data, record_id) + + def _ReplaceWord(self, data, pos, word): + return data[:pos] + struct.pack('>I', word) + data[pos+4:] + + def PalmDocHeader(self): + compression = 1 # no compression + unused = 0 + encryption_type = 0 # no ecryption + records = self._record_count + 1 # the header record itself + palmdoc_header = struct.pack('>HHIHHHH', + compression, + unused, + self._length, + records, + Record.MAX_SIZE, + encryption_type, + unused) + assert len(palmdoc_header) == 16 + return palmdoc_header + + def PDBHeader(self, num_records): + HEADER_LEN = 32+2+2+9*4 + RECORD_INDEX_HEADER_LEN = 6 + RESOURCE_INDEX_LEN = 10 + + index_len = RECORD_INDEX_HEADER_LEN + num_records * Record.INDEX_LEN + rec_offset = HEADER_LEN + index_len + 2 + + short_title = self._title[0:31] + attributes = 0 + version = 0 + ctime = self.EPOCH_1904 + int(time.time()) + mtime = self.EPOCH_1904 + int(time.time()) + backup_time = self.EPOCH_1904 + int(time.time()) + modnum = 0 + appinfo_offset = 0 + sort_offset = 0 + type = 'BOOK' + creator = 'MOBI' + id_seed = 36 + header = struct.pack('>32sHHII', + short_title, attributes, version, + ctime, mtime) + header += struct.pack('>IIII', backup_time, modnum, + appinfo_offset, sort_offset) + header += struct.pack('>4s4sI', + type, creator, id_seed) + next_record = 0 # not used? + header += struct.pack('>IH', next_record, num_records) + return header, rec_offset + + def _GetExthHeader(self): + # They set author, publisher, coveroffset, thumboffset + data = {'author' : self._author, + 'publisher' : self._publisher, + } + # Turn string type names into EXTH typeids. + r = [] + for key, value in data.items(): + typeid = EXTH_HEADER_FIELDS[key] + length_encoding_len = 8 + r.append(struct.pack('>LL', typeid, len(value) + length_encoding_len,) + value) + content = ''.join(r) + + # Pad to word boundary + while len(content) % 4: + content += '\0' + TODO_mysterious = 12 + exth = 'EXTH' + struct.pack('>LL', len(content) + TODO_mysterious, len(data)) + content + return exth + + def SetImageRecordIndex(self, idx): + self._first_image_index = idx + + def MobiHeader(self): + exth_header = self._GetExthHeader(); + palmdoc_header = self.PalmDocHeader() + + fs = 0xffffffff + + # Record 0 + header_len = 0xE4 # TODO + mobi_type = 2 # BOOK + text_encoding = encoding['UTF-8'] + unique_id = random.randint(1, 1<<32) + creator_version = 4 + reserved = '%c' % 0xff * 40 + nonbook_index = fs + full_name_offset = header_len + len(palmdoc_header) + len(exth_header) # put full name after header + language = languages['en-us'] + unused = 0 + mobi_header = struct.pack('>4sIIIII40sIIIIII', + 'MOBI', + header_len, + mobi_type, + text_encoding, + unique_id, + creator_version, + reserved, + nonbook_index, + full_name_offset, + len(self._title), + language, + fs, fs) + assert len(mobi_header) == 104 - 16 + + unknown_fields = chr(0) * 32 + drm_offset = 0 + drm_count = 0 + drm_size = 0 + drm_flags = 0 + exth_flags = 0x50 + header_end = chr(0) * 64 + mobi_header += struct.pack('>IIIIIII', + creator_version, + self._first_image_index, + fs, + unused, + fs, + unused, + exth_flags) + mobi_header += '\0' * 112 # TODO: Why this much padding? + # Set some magic offsets to be 0xFFFFFFF. + for pos in (0x94, 0x98, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xdc): + mobi_header = self._ReplaceWord(mobi_header, pos, fs) + + # 16 bytes? + padding = '\0' * 48 * 4 # why? + total_header = palmdoc_header + mobi_header + exth_header + self._title + padding + + return self.AddRecord(total_header, 0) + +if __name__ == '__main__': + import sys + m = Converter() + m.ConvertFiles(sys.argv[1:], '/tmp/test.mobi') \ No newline at end of file diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index ff5daa51..10fb6198 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -21,6 +21,7 @@ import urlparse as up import BeautifulSoup as bs import htmlentitydefs as hdefs +import mobi import zipdir import html_constants from constants import * @@ -81,6 +82,68 @@ class TextWriter(FanficWriter): self.output.close() +class MobiWriter(FanficWriter): + body = '' + + @staticmethod + def getFormatName(): + return 'mobi' + + @staticmethod + def getFormatExt(): + return '.mobi' + + def __init__(self, base, adapter, inmemory=False, compress=False): + self.basePath = base + self.storyTitle = removeEntities(adapter.getStoryName()) + self.name = makeAcceptableFilename(adapter.getOutputName()) + self.fileName = self.basePath + '/' + self.name + self.getFormatExt() + self.authorName = removeEntities(adapter.getAuthorName()) + self.adapter = adapter + self.mobi = mobi + self.inmemory = inmemory + + if not self.inmemory and os.path.exists(self.fileName): + os.remove(self.fileName) + + if self.inmemory: + self.output = StringIO.StringIO() + else: + self.output = open(self.fileName, 'w') + + self.xhtmlTemplate = string.Template(html_constants.XHTML_START) + self.chapterStartTemplate = string.Template(html_constants.XHTML_CHAPTER_START) + + def _printableVersion(self, text): + try: + d = text.decode('utf-8') + return d + except: + return text + + def writeChapter(self, index, title, text): + title = self._printableVersion(title) #title.decode('utf-8') + text = self._printableVersion(text) #text.decode('utf-8') + self.body = self.body + '\n' + self.chapterStartTemplate.substitute({'chapter' : title}) + self.body = self.body + '\n' + text + + def finalise(self): + html = self.xhtmlTemplate.substitute({'title' : self.storyTitle, 'author' : self.authorName, 'body' : self.body}) + soup = bs.BeautifulSoup(html) + result = soup.__str__('utf8') + +# f = open(self.fileName, 'w') +# f.write(result) +# f.close() + + c = mobi.Converter() + mobidata = c.ConvertString(result) + + self.output.write(mobidata) + if not self.inmemory: + self.output.close() + + class HTMLWriter(FanficWriter): body = '' @@ -92,14 +155,14 @@ class HTMLWriter(FanficWriter): def getFormatExt(): return '.html' - def __init__(self, base, adapter, inmemory=False, compress=False): + def __init__(self, base, adapter, inmemory=False, compress=False, mobi = False): self.basePath = base self.storyTitle = removeEntities(adapter.getStoryName()) self.name = makeAcceptableFilename(adapter.getOutputName()) self.fileName = self.basePath + '/' + self.name + self.getFormatExt() self.authorName = removeEntities(adapter.getAuthorName()) self.adapter = adapter - + self.mobi = mobi self.inmemory = inmemory if not self.inmemory and os.path.exists(self.fileName): From f3571959df98fdb237e192863d1b256bca00d0b6 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Tue, 23 Nov 2010 12:42:33 -0600 Subject: [PATCH 82/94] Change a couple of the example story URLs. --- app.yaml | 31 + cron.yaml | 4 + css/index.css | 71 + delete_fic.py | 59 + fanficdownloader/BeautifulSoup.py | 2014 ++++++++ fanficdownloader/__init__.py | 1 + fanficdownloader/adapter.py | 229 + fanficdownloader/books/place holder.txt | 0 fanficdownloader/constants.py | 542 ++ fanficdownloader/downloader.py | 205 + fanficdownloader/ffnet.py | 358 ++ fanficdownloader/fictionalley.py | 301 ++ fanficdownloader/ficwad.py | 267 + fanficdownloader/fpcom.py | 344 ++ fanficdownloader/hpfiction.py | 280 ++ fanficdownloader/html2text.py | 452 ++ fanficdownloader/html_constants.py | 19 + fanficdownloader/mediaminer.py | 406 ++ fanficdownloader/output.py | 424 ++ fanficdownloader/potionsNsnitches.py | 367 ++ fanficdownloader/readme.txt | 10 + fanficdownloader/twilighted.py | 316 ++ fanficdownloader/twipassword.py | 4 + fanficdownloader/zipdir.py | 177 + ffstorage.py | 21 + index-ajax.html | 109 + index.html | 204 + index.yaml | 22 + js/fdownloader.js | 116 + js/jquery-1.3.2.js | 4376 +++++++++++++++++ main.py | 316 ++ queue.yaml | 5 + recent.html | 69 + simplejson/__init__.py | 318 ++ simplejson/__init__.pyc | Bin 0 -> 12071 bytes simplejson/_speedups.c | 2329 +++++++++ simplejson/decoder.py | 354 ++ simplejson/decoder.pyc | Bin 0 -> 11292 bytes simplejson/encoder.py | 440 ++ simplejson/encoder.pyc | Bin 0 -> 13938 bytes simplejson/scanner.py | 65 + simplejson/scanner.pyc | Bin 0 -> 2340 bytes simplejson/tests/__init__.py | 23 + simplejson/tests/test_check_circular.py | 30 + simplejson/tests/test_decode.py | 22 + simplejson/tests/test_default.py | 9 + simplejson/tests/test_dump.py | 21 + .../tests/test_encode_basestring_ascii.py | 38 + simplejson/tests/test_fail.py | 76 + simplejson/tests/test_float.py | 15 + simplejson/tests/test_indent.py | 41 + simplejson/tests/test_pass1.py | 76 + simplejson/tests/test_pass2.py | 14 + simplejson/tests/test_pass3.py | 20 + simplejson/tests/test_recursion.py | 67 + simplejson/tests/test_scanstring.py | 111 + simplejson/tests/test_separators.py | 42 + simplejson/tests/test_unicode.py | 64 + simplejson/tool.py | 37 + static/ajax-loader.gif | Bin 0 -> 10819 bytes static/favicon.ico | Bin 0 -> 21792 bytes utils/remover.py | 53 + 62 files changed, 16384 insertions(+) create mode 100644 app.yaml create mode 100644 cron.yaml create mode 100644 css/index.css create mode 100644 delete_fic.py create mode 100644 fanficdownloader/BeautifulSoup.py create mode 100644 fanficdownloader/__init__.py create mode 100644 fanficdownloader/adapter.py create mode 100644 fanficdownloader/books/place holder.txt create mode 100644 fanficdownloader/constants.py create mode 100644 fanficdownloader/downloader.py create mode 100644 fanficdownloader/ffnet.py create mode 100644 fanficdownloader/fictionalley.py create mode 100644 fanficdownloader/ficwad.py create mode 100644 fanficdownloader/fpcom.py create mode 100644 fanficdownloader/hpfiction.py create mode 100644 fanficdownloader/html2text.py create mode 100644 fanficdownloader/html_constants.py create mode 100644 fanficdownloader/mediaminer.py create mode 100644 fanficdownloader/output.py create mode 100644 fanficdownloader/potionsNsnitches.py create mode 100644 fanficdownloader/readme.txt create mode 100644 fanficdownloader/twilighted.py create mode 100644 fanficdownloader/twipassword.py create mode 100644 fanficdownloader/zipdir.py create mode 100644 ffstorage.py create mode 100644 index-ajax.html create mode 100644 index.html create mode 100644 index.yaml create mode 100644 js/fdownloader.js create mode 100644 js/jquery-1.3.2.js create mode 100644 main.py create mode 100644 queue.yaml create mode 100644 recent.html create mode 100644 simplejson/__init__.py create mode 100644 simplejson/__init__.pyc create mode 100644 simplejson/_speedups.c create mode 100644 simplejson/decoder.py create mode 100644 simplejson/decoder.pyc create mode 100644 simplejson/encoder.py create mode 100644 simplejson/encoder.pyc create mode 100644 simplejson/scanner.py create mode 100644 simplejson/scanner.pyc create mode 100644 simplejson/tests/__init__.py create mode 100644 simplejson/tests/test_check_circular.py create mode 100644 simplejson/tests/test_decode.py create mode 100644 simplejson/tests/test_default.py create mode 100644 simplejson/tests/test_dump.py create mode 100644 simplejson/tests/test_encode_basestring_ascii.py create mode 100644 simplejson/tests/test_fail.py create mode 100644 simplejson/tests/test_float.py create mode 100644 simplejson/tests/test_indent.py create mode 100644 simplejson/tests/test_pass1.py create mode 100644 simplejson/tests/test_pass2.py create mode 100644 simplejson/tests/test_pass3.py create mode 100644 simplejson/tests/test_recursion.py create mode 100644 simplejson/tests/test_scanstring.py create mode 100644 simplejson/tests/test_separators.py create mode 100644 simplejson/tests/test_unicode.py create mode 100644 simplejson/tool.py create mode 100644 static/ajax-loader.gif create mode 100644 static/favicon.ico create mode 100644 utils/remover.py diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..6f1ccdc8 --- /dev/null +++ b/app.yaml @@ -0,0 +1,31 @@ +application: fanfictionloader +version: 2-5-5 +runtime: python +api_version: 1 + +handlers: +- url: /generate_mock_data + script: mocks/generate_mock_data.py + login: admin + +- url: /r3m0v3r + script: utils/remover.py + login: admin + +- url: /r3m0v3r + script: main.py + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + + +- url: /.* + script: main.py + diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..1d9c70a0 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,4 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 3 hours \ No newline at end of file diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..f4aec452 --- /dev/null +++ b/css/index.css @@ -0,0 +1,71 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 43%; + margin-left: 23%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ + margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { + display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + font-size: small; + color: #f00; +} \ No newline at end of file diff --git a/delete_fic.py b/delete_fic.py new file mode 100644 index 00000000..73722724 --- /dev/null +++ b/delete_fic.py @@ -0,0 +1,59 @@ +import os +import cgi +import sys +import logging +import traceback +import StringIO + +from google.appengine.api import users +from google.appengine.ext import webapp +from google.appengine.ext.webapp import util + +from fanficdownloader.downaloder import * +from fanficdownloader.ffnet import * +from fanficdownloader.output import * + +from google.appengine.ext import db + +from fanficdownloader.zipdir import * + +from ffstorage import * + +def create_mac(user, fic_id, fic_url): + return str(abs(hash(user)+hash(fic_id)))+str(abs(hash(fic_url))) + +def check_mac(user, fic_id, fic_url, mac): + return (create_mac(user, fic_id, fic_url) == mac) + +def create_mac_for_fic(user, fic_id): + key = db.Key(fic_id) + fanfic = db.get(key) + if fanfic.user != user: + return None + else: + return create_mac(user, key, fanfic.url) + +class DeleteFicHandler(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect('/login') + + fic_id = self.request.get('fic_id') + fic_mac = self.request.get('key_id') + + actual_mac = create_mac_for_fic(user, fic_id) + if actual_mac != fic_mac: + self.response.out.write("Ooops") + else: + key = db.Key(fic_id) + fanfic = db.get(key) + fanfic.delete() + self.redirect('/recent') + + + fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1", user) + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + \ No newline at end of file diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..31ff0e5f --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +# -*- coding: utf-8 -*- + +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.0.8.1" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
      (No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

      FooBar *

      * should pop to 'p', not 'b'. +

      FooBar *

      * should pop to 'table', not 'p'. +

      Foo

      Bar *

      * should pop to 'tr', not 'p'. + +

      • *
      • * should pop to 'ul', not the first 'li'. +
    10. ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
      ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

      tag should implicitly close the previous

      tag. + +

      Para1

      Para2 + should be transformed into: +

      Para1

      Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

      tag should _not_ implicitly close the previous +
      tag. + + Alice said:
      Bob said:
      Blah + should NOT be transformed into: + Alice said:
      Bob said:
      Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
      , + but not close a tag in another table. + +
      BlahBlah + should be transformed into: +
      BlahBlah + but, + Blah
      Blah + should NOT be transformed into + Blah
      Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + + + + + + + + +
      +

      + FanFiction Downloader +

      + + +
      +
      + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
      + +
      + Ebook format   +
      + +
      + +
      + + + +
      + + + +
      +
      + +

      + Login and Password +

      +
      + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
      +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      +
      +
      + + +
      + + +
      + +
      +
      + Few things to know, which will make your life substantially easier: +
        +
      1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
      2. +
      3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
      4. +
      5. Paste a URL of the first chapter of the fanfic, not the index page
      6. +
      7. Fics with a single chapter are not supported (you can just copy and paste it)
      8. +
      9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
      10. +
      11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
      12. +
      13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
      14. +
      15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
      16. +
      17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
      18. +
      + Otherwise, just have fun, and if you want to say thank you — use the email above. +
      +
      + Powered by Google App Engine +

      + FanfictionLoader is a web front-end to fanficdownloader
      + Copyright © Roman Kirillov +
      + +
      + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..4ee35c4f --- /dev/null +++ b/index.html @@ -0,0 +1,204 @@ + + + + + Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org to epub and HTML to Stanza, Kindle, Nook, Sony Reader + + + + +
      +

      + FanFiction Downloader +

      + +
      + + +
      + + {{yourfile}} + + + {% if authorized %} + +
      +
      +

      Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier.

      +

      To support new features, such as including story summaries, + the URL you need to use for some sites has changed. See below for example URLs for each site.

      +

      Or see your personal list of previously downloaded fanfics.

      +
      +
      + {{ error_message }} +
      + +
      + +
      +
      Ebook format
      +
      + EPub + HTML + Plain Text +
      +
      + +
      +

      Login and Password

      +
      + + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide + your credentials to download it, otherwise just leave it empty +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      +
      + +
      + +
      + + {% else %} +
      +
      +

      + This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

      +

      Login using Google account

      +
      +
      + {% endif %} + +
      +
      +
      fictionalley.org +
      Use the URL of the story's chapter list, such as +
      http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for + fictionalley.org one-shots, such as +
      http://www.fictionalley.org/authors/drt/JOTP01a.html. +
      fanfiction.net +
      Use the URL of any story chapter, with or without story title such as +
      http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
      http://www.fanfiction.net/s/5192986/5/. +
      fictionpress.com +
      Use the URL of any story chapter, such as +
      http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
      http://www.fictionpress.com/s/2847338/6/. +
      twilighted.net +
      Use the URL of the start of the story, such as +
      http://twilighted.net/viewstory.php?sid=8422. +
      ficwad.com +
      Use the URL of any story chapter, such as +
      http://www.ficwad.com/story/75246. +
      harrypotterfanfiction.com +
      Use the URL of the story's chapter list, such as +
      http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
      potionsandsnitches.net +
      Use the URL of the story's chapter list, such as +
      http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
      mediaminer.org +
      Use the URL of the story's chapter list, such as +
      http://www.mediaminer.org/fanfic/view_st.php/166653. + Or the story URL for one-shots, such as +
      http://www.mediaminer.org/fanfic/view_st.php/167618. +
      + + + A few additional things to know, which will make your life substantially easier: +
        +
      1. + First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
      2. +
      3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
      4. +
      5. + Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, mediaminer.org and twilighted.net. + fanficauthors.net and tthfanfic.org offer native ePub functionality. +
      6. +
      7. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
      8. +
      9. + One-shots, fics with a single chapter, are now supported. +
      10. +
      11. + You can download fanfics and store them for 'later' by just downloading them and visiting recent + downloads section. +
      12. +
      13. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
      14. +
      15. + If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is + too large to save in the database and you need to download it straight away. +
      16. +
      17. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
      18. +
      19. + If you think that something that should work in fact doesn't, drop me a mail + to sigizmund@gmail.com, or, even better, write an email to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
      20. +
      + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
      +
      + Powered by Google App Engine +

      + FanfictionLoader is a web front-end to fanficdownloader
      + Copyright © Roman Kirillov +
      + +
      + + +
      + +
      + + + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..bbed2dff --- /dev/null +++ b/index.yaml @@ -0,0 +1,22 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
      " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "
      ", "
      " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + + + + + {{yourfile}} + + +

      +
      + Hi, {{ nickname }}! These fanfics you've downloaded previously. +
      +
      + +
      + {% for fic in fics %} +

      {{ fic.name }} by {{ fic.author }} ({{ fic.format }})
      {{ fic.url }}

      + {% endfor %} +
      + + + + + +
    + + + + diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f01003d4f81d37513d0f8a2a5fb857b8448ae2bd GIT binary patch literal 12071 zcmeHNL37+jc5aXoC52lM??!ES`^Va5uF#&l87#d{Ux!3 zn-|3n?q3p7OB|dN$$7DJUN}^KM;t7P z_xSsL5nU6}&*=IaadGDz>Vo#>kD8f3w86!7@%u|+hsD0O&EIev42oGpIC}l95x%f< zrIyx+#l_FX@3E|_XH|W`RXp2m??ckj2g^eIdi&8s>HRu*9&Cq2oR{*^@Rna)x_EB5coSj#}_YN%Byvr%iNvp!DC;7EE8?)~QTmG#@}@@5f9 z6~#tUrBx&Y>YT*;?9r*L2+zFO@cy?gJgjIku=it zI6O$yKw_vWQQDVVC9RKys3XiN4U*(oP6A929~HHpV;JbA9?3{Cv$KQAFtd$ioXRhc z%Q2d-`?tGtSe1<^-3qfw4jm7%gz{J(#^re0_!dvG>H9Gky|5|@m6pkIM~(yC((!&8 zkK!;$OPQ;J^_GT82GMie3ig%mO7&c&EIY&4m5$SWUR##amIR5s*P>;nyd(&aI#(*H zat-xANW(0m4#PmlVLi9Z*vB|lP;7`Fy|K}1N&LHe7p5`Ev!ayKJ)`|5?KCaejG}6i zYj4*bWtrQRFWg~JxEs>L@7E|l%u>~rYyL-Fx!yT>+Tp(LZX2!JXx&EZ_J-WW@7E}& zRg%=LpPoE*o00MYo5q9tX1w+uiP)p=M&`_o*Y~R2y=ra!<}J7G!=?7?JGgs$P20Wi zX!oKWVi{OuduV?H`aS7N4ITCm)Un=tTvW=8`=ZUYGp)JzNi&a8kxk@wiAC>kJ*qdN zE;p^>Ol~%eL#+kpb%I85+Dcc=8GuNYyJ!st{ z8`R+21;w0HWLzV4KEj*5=_9?0)o==6&jfOlQ&B&Q%x(N&GdP9*(Wn zUq*IYzTb{SY6J(`r$~{gBQFZe&IXU>`#zb1j7QS#*Y*9rOZJ0S^Npw>48JN;gr-K) zu8UKi(D6oxT{oTt`>wUMTDt9o`g&0QZTyAZ@)zxyDZmy>Y$yB_iAQM-mn0mQ>nE-; z+j;<_oc=h=4mPLjG|KnZe!2c^x(_z8K#vfXoH>s*e+|(CPC={w2y-hpZEGKgf_kw5 zox10_)Xj{;cKG@|d^=x8d&oUiy-yyN{pvo(2+o9CLPho6daF(~oY~7=H1kQxT{=iU z>DU~}TDwIMYb75a=juUGWQA9#yzsJ){H1IY#!0i%m?)4F+iWmQl#PrKF|T41LD$iD z4Rgbqf+{ID=htPF=mhd|eQ&^Hzw1I*0} zKSO$}^@JhP6u_e~QbSX{Pn zImHbun2iFO;RRRaXyw!L0$N!E4QW|4!u$CA3qJbK_FN%{Tkvir!S~ADQoIrCiB`{sg2xOP$a6!=E7X^)aNnkh(@~yZy zEC1+2@p4(*e|k-vTox}eHl$ppv7N8}UHlFc-}Xou`VUagwsjdw4hAsn0VoymI*xdT zzm?#61{TtB84N}_8hHWR@nGN|7C4YzXE0dN6%3+a+Z@G-T1nyqMGg)2+5rn8hqpC? zfZ6~ch6oyB3^AD$HBlUvxJ%Z7TR|y6_SjB#L0mQp7&_~?TDhn!KM>;Ms%#-y9n0V`o9<)gl;VvG->!xMNIJT&8OrK31S zV#Cg2TAWjiamz+40qitgN!31*BF_~DFV(&(?1|v%1w{dqSBaYNaI{&*QShWDYBHo8 zP`#(KQ5olx6D;f=%%CzsZY1&L=P8CFqGoQeDCqbjBPUCd{(&A4&}6C(nUncYz3~Kf zs%VmFqf?^11hbTeKu(~|n(#F8*cFond2oc2ep3Z-g$;?Aaz(gL;9?SVG_VUddlDT zC1Qh_Wrmp)aRm1QrC3e-_2OtFCJH&hh`f2d0Jyx!q)FkY*)?_G!TB`8{K!R=hk`JsN_&yRZ~ z?rVJX-_cd~s&mD0owjpr;j+_m-pBuC=lbFWvE+obaMD);`~D3DfTHCr1Ka{=8{-G) zFTfAb-wu9&wG6oX4GIlT`wWE!1KvadFwDTde?$Rj%=GyHg%+g0HxK)^8QnFKE$BOM zKp$=c^x@gYB11pGH9$XrK0rT!K7PnSLj&|-On^SdeY2p?0=hrLVzyrabg<4>0G)*n zb3V=Da&urA#E{vOXZ!2KWi~0oF}H~|HLjhaA9BhZU*72mP19r zQt2=t%t?EMM&#qDHV8qBGT71omI$hO zJ7ORE3JXl4EUbtMBAGy7#Xc@KYnjGDW+r$f&zuo%%I2a#Kg_mSYS;u)WQ`D7xsGCO zhW-VQ3JQ`+&JbF}pMc?|D{Fx2jCfER8b3M3F~Gyp02_%<=CFw+g@4kP=<>IQ4XJz3o7aY3LxcX7!n3JEHD|%d%5jwB8HTX zEc__%c=bX-Ozg8} z(jz|C!@(7t?B`QTUZF>?Se+8yyFvHpouVEQtG|dZqv`w?KAI*Wu3u%c*7z#&$?U5Z z;qxdDzQhT5qGj+@ra-i8u`UoT4}yxX`%wrGH@r;hiKV_*U_?O7)#3*3@tNb zToTz8bOtp81#`q5CURyFTLr#`ss&qRCS$8W;!w3{tITX{l}YEHvsIV^+B($yTj4a> zeCv1r15|z9?^4=;PYQDdI_@(^yGhc_xLdeYvR*c?H*>e(A@;08wi%}3LgX;s(C_ky z?0aG6ukpKY=5|>Uxn9oJ{}8v!lIn*G*6p71x-)KbR1tZ z^&43~xy3qq%(yFxO?cO#37Xx8d7p|GoZz-70r3Kfq2ky+mZD@i0R=d2yGy?Of&v{s z%Z4q%GRZibgf<%Uj&qvbODkk)%YuUonw<&(2nDbNW3Ti^Bjxbuc<~wxe4ytKoKIzW zb!@;?=+y)t{+?e^XpE$B&OgH1;o{$F<>>b#t{c)QqhGI_)ldh)!C*f8yxF2D<%(TK z95fr1(KBBHZN%B}NOwc7)DIrIa(ab_6!m+9=3Ny|TbZOd$NE#7YjVhNLh2|~d``(2 z;}kidI5K+*>!61ZjfWsqRnbeW0C>ip|3YD&g8S+COF7<$Uc*!7Q@h6Y3+656+B|Nj zxQZvvh_L?;(VcL{6%4Nb0T-g}Iyao_j^Qbnk))mdJoMf}6MjbD?;|Aj`;2Y+eVhMB znMg;!4+o8F;##<_kZ~_;mDK<*o7*x3R>d^E{VRGF%eaSL37V2UP9XP4Nj;iqufa$j zNvlbN??bUIMMVdWXMnUth%ajb-P5 E0OpUxq5uE@ literal 0 HcmV?d00001 diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ae9b3591ee9c6400d5cd09eb0a05999ef680bdc GIT binary patch literal 11292 zcmcIq&2tmkcE2sjACi&37_iM}!weOf0h^grg(*VjJp*RO4@U6XFwFBak=p8(-B{|D zyIWwr$eTbG*{8C4o2;@-Ro*I9S){Vde~_xAD%-5H%x3cYo!i}#{XArtI09eY&vVZ` z=iKvk^}GKnmp=LJZ`T?s`IPbfbNogBXei|K(wGQ9BBIomM%g zsO}crSyW1_%%C!%?rxlw9p;pSt)|pl;4v>toH8a%Vx}{N$}}rnGRq2~eZplA;DT$P?H3UqU?swTMdF{uF3{A%N4Zo6Ljgj~FYVy)e2 zxoy{v?br)^GiaK>SbtJ|;5Gus4NKnl_*5(4wu(=oXxqDPEo#`l?}oa!L_p|>;?VQA z;|Vl}0Epz|(Km*_`<&WE0Xgwd=G0q59RM<#;7(q>%B$d(GWr-RC=>F<|0C)z@@hf% zT|U$|hqb<;j5x2n0)+sRz^%=kIsn|9>Km$$GN&e*c^sK2RAUq6?S~J)xHa@ad#H8m z762->rtLSq#v6BQIr7NLS5e?EYplYhPA{GUj9Rl5*l{BOldw-TgRY2tQ^u4rZQL^E zjRwzEz#k7Ld|F2Z^dz6;R)d&tag+0Ej+E8x%Bi3Pc7fP=<)4!pmtg+Tl6ef04SCh1 z;Bk{`M1d*+w#>$HxqZyN8$s9C)PguU?!L?y(l|jwzB@rA)N6UZ>j%lAmLHrj-6$>G zq7k&tqnglb!6+N0+=9{IG01F6hR|ytl^7%wnM=2d3=!+kmbMrmx%$r;+T(Zvcpu)w#cPpfUu%#VsTeE0br_&zScdU=tb0+O+ zwqv;=7)iKC@}UJxPn?0goo*NcuTM~mCQ+!|8KayZg}k3Z84>rJYq&nX7x!YKk~Mfo zt9CUa>XoJIKU=K)`s(K9=Kif)H#he`Yrb6UQi88SbD@;_rpWZB)R(1yS?c3b-;(+% zsW+uwlDaSTveZMV7o{FYybRsAdGm_~4??0+wTh_y9N6&&&Wlvx24V>=B_(KECBg!Q zdH^wzt2hrS+W+qyg-rwu(s@u+{bCB?f~ZjN6r#o%JqYVKq5le1X(r{!7;2OlVKOq_)Dg+L}Q~!zXDTobN zK?9~>5%&c+Ptby$Jvc=<1Csqy=`m6y_pTU5AHXMww6XVxvRiL5nYM6HRxkWdRRN+$ z@8Ys}7aG>Vf$vh2n2s90sky`LJ>5635^Tj)KDu zsJ0|4q_Et$8~T`Q#$EU$38fNToaEqg1TG(@=3pnGq(B=`6J|nFbO{ayct>mZ+y}aC z&$mh2lz${OTu~6hGC7g6H)Z--LIrq2hfii+1IJ_0yj^jR(wUo@_IFHZK=>*&dtwu^ z@$Pr{BpmXlIY<2~itqo8#rK%~G5QYA-qdTNF1WT0%*~Ds=Z^kHL^o)&>rvCgIf7eE zFU?c`j)GQzzO$-U#LUj28gs)3s}EP6KYmtQt=@m~V6FOmr&K z1z+ZDhe5X!$s8~8#5ZiX(Bs(0_vrr1&>Fok5iqfkkKCSakQAZY3B~xg5I^M6;rX#e zJ1Q&N_I?UTkO$D6P&|iMf#+AmPY_OOl(Z{=eHlh@0r?5fnKY?w>94j7GWD(LvU zf`JqC&lys8dJwn3>uA8N%WPOF2>M&B$I7GY#i#%V0z_SKGiK~6RC`}n{U;Yq&vQOGBg2ROmu*XPY} z`pP(hQ}36_oP~r=cd=E5v0k1IT^z!K)E*||-rs1*FZ|PRRe&{oo-uJ%#0BXsFd|8? z(ZTX5o>sd^;LyND(&Pl*hf&v_4EzLe1L>09y%cZF@&x-ocn^xnVG_SkQa=fzN$O`@ zd`5A_1{BC7K^(>MHW>s4U93jDDF;Cd)9I<#I2#}sEUCU<0mHVAfmBY-S4;! zF^nCg5RjednZDh2WyT#bYJ(YfV6IzUo6bN)8#5RQwq5}W0$nIjD(@RdR^Aw_tWcwi zZnv(pM(ZP5^jX+-EiSf}*hST{9tWK@x*kkISwy$OZqfQnx2)T!TMS(#e1x;)*8+G# zl5Nr$)pmklR~v~pqPuHvBI}R1Oza)40?llcDS6TORPwuSPmBf}q^^6y%ANL??%orj z+Fzj+?W4%6qA{AEfzB;M51+yJENUk8yc#pk7Z9H!Mm2M#T;7;L?~HK`T6zSn1$;2XoBhJrE{ zU{GwdGDRj*w1xs20}WMR(g^k;nVc$Dt)XC1g9L~P{pJwRs8_)p&_dJA0qhPKPRew?=PY1weHrc9v#MaEM7nz1GMM{ zI#pUX$kly$?A5j&Vb*&tHJt%{sTWy$%;Fms*&UZ#U7?s+RTfWJ{E~&mVsI&mkKDS* zdd88vEWSl?!ncrl9^S&0FtmIBfkt!>gYx;Eae)2 zA|KL(AfpE;T)ade+(ijr1WXfI~(x*y^F)w7aeNjt21Q&BLFgZH|5_TlFn} z`bj<_ZT~cweix~f?9(@^NLFV}seA~YBFbwqavxCucH?b_FDXuB*6YX;*XuV>Npr_+ zF+o$WXSwBi-LxZtfs}_AA~7F^Zqx~U#=4j&jKL(W3~!ikb`UBvGZS><$a5q&qi?Bfdh}-}lO{DA$yzLN^s0m-IUPq!zB6*w&i3CZ~ zAX~+ZPdZ|DBPPTGin<$iLKo!qd!PocSG_LOBA>KUh!R!jP;13IAWfREFe{QJAJS%v zOQs24?bwm&2N@i<*@WW?(jv>1sla+Yxw}9!*(jkdx2|UCy9RPKNF%g*X5gbpkH7;V zxhGjMGwQ;rj&KU323OYaudSJHf^hdnhJe-@;AztgK!#p92gIvgK`dL4{-Mqa^RCrT;95r4d< zR5!X&9JDh)j*FL0582P(B*v=ZE^RMSLO|M15XOS*P3!4>v(W;b%pFJ$F3{p3;&f~< zK|gr@c;aZ@WSAT#j#isM14l?Gv}E4PjAtT_k%{!IhZ_tz0nQmLSzUp8uVE92=5Yi8 zVL)>&6^@OU_+AGT4IvFUi%#rP9+{Vxo`tJ)*iez-Tj8!kzZWk`g%!TT9~Un0c|b_hT4S+2k+u;=sH8fo@9b0IDQJ%D`Bh< zK?38Un_zv;Vfw+k6NHnLXzl)!>iV;l>a%rOQLAmcF@pYftp+`xPEM&O0l4*okoZ#& z4M}Sb3qJ(nGg#UyUYCUULsK>B7z5~-Kz@)R34zFMHFb>7V_2IkUb3jOVBCMg5dkw3 zwHn`jYqcu~fg`?Ct4l@XTBjwq${PPQm?2>DfQD9#?4O|mlZUJ&U%6oZU{_I*hfYMY zZ)p<^#;kdD023>t2=uu92$O$m4k;^{)z=Lfr@^E zLcg%-EF1q}A(4zJ_ zD3Ow1@tl3o0Jt#FTir*LVN!MJU&gR6wBOXUy42y~XXqOQi89YZl#~R7j1L7a1WM+g zEXOVK?-%4WQBxz=CI5ZRkRxPFX4EB2{u-6ug#n+z#v~IVTz%nRM!XH?ArpbOZhXK` z$U?#@$y$ zJN9cRI|`HU23D-lpJzf>%;Ujj5?O_+Bu69@S*tMz3w2Aetkv3q(`~Uyajn%r@mfu?p*qik z)PjCmd-3%eemVAZ<^HPZ{5F2vwId>Kam&Feq;?WNBeN*P`Sd8G7e;eMW41CiA-^NE zc8M2fgswdQSv*b5XUf5E(w3OD#jgfM|Hss~Lt`~Ku&#sG$brph3_Xd7Wx)>>;RDL! zqmU$_5!~E_Uf-@-_nxe;ikbTcFQRLY*Q%@XVEEw4vz5n>g;Wn8uY9#`@kenMACP7E zX%NALOSS55qbk2~(yyg|%1Pu#BWQQLmQFdZKeO=AdjIR^)km`1y_Nfq)*r8|e{GTA znpbVszGBfrA?9?zwHhFZCDn$LdM7cYq~iYr|Dtigy{pCROHoL7(J_jbKm5y0> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/encoder.pyc b/simplejson/encoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e59d372a7ab88749c287a7a2a77dfad41ebd616b GIT binary patch literal 13938 zcmcgz%WoVu6UVGPCtt?yCBhuPSIZfHJC2EJw=^C=d zVfU!JMb#3`xaN zKxe3_sjlZ&Rp0mf9@YBQzmHU2|K^)Jb(MZjryf^SIHD|k&>9s%L0O-t?Mu>rKs^|IUi(4QUR%m9E3Z#| zjp-^Xee9RVq3m%$9*5PcQYCLt6}+;#Qt~Ru3^6l|%m_21$c!;Fj?6J;jw5q|nF(Z0 zsw)ph)aF}Ck1B7HW8Qf5ntCv%^oa7ul|H8Q7)!k=W*U~7{2EJo@^9+CKCZk|Lc|HB zC)9(J$~&!|VY4QccSbVkQc^AKj-FBvipqOQ=~EoSdzsZ=EA_>L(@LLF8Wk@q{fg4x zQy5jhsjS#LGY=BjivMeT=BqeFl}sVgQeInjV;v`vAFR~exbFL=!7v)EyKA~;T1D|G6g*tq zym)bLar3Rl;^Oq;@?v8#Sd12vNAVacs}E85X#3$W7VWv;dNdOcnGToOq0x907x28e z7<2kX^Dv$8VcI+_OUltJd}MK!^Hg@xc&W$?`$Rm$iAh2 z%uk(DaOx7i*YSacf#GCtmF18)Ll(Ezkd?^3 zw3kgeR~S3o?IWg$yLEs)r#TysQI*Pt{9L&y*IBcK#OyHG3I`d^C@U!_zbFTbO)sg( zWfh)K77w8>J%k;l0!tB}QidohbWi$~KhWU`9M+UqBwKK{_!^RoEdu-6!u30M=E9)4 z^+GHXbEtOGn7brP;S;~YEPsp)i4;b~aP}fExl1SEa``SY7(|flrfqyWZcBA@g!iCOODgBW zJ`VR)WJ=cjU5L~f%+T6O;rsyTzlqwIt5c;ntP@swJRy8{X^%SV_~Ds+h;O`zL_@(r zvXFvxpq{}-qUfRfb$|8`qBbc1P>W1SJu6e|gZZAJBUF38WT4v7azH)9I77-YycWZ1 zP&YRd9mM`}QyU4pvYI5V%NH&X$Ige*%2hiBGrJK+8610V;u4eFb>O#-&l_M~Xt!Em zwb)KpHCh8MH=y|JaQU&Wqtp#NJK1XK*k0bUJ>78I&E&G_`R3fg&;GJE1^l-&5~vB% z&$gjFd$hNu<(uyv{Om8w#`OGpl6RNPc1m_unvQvp%(>^$+4SRN)=t{c3F#o)wi`w6 zRyJft=L?L9EuXd9flod+!)&LjF&p&7>GT7T*$jh~S-a5;UF^fQ9kkb%b@YE9^Ip`> z7M%{VZG&67%3hq#TW&M%8UW*N+l`*G7ax<*k*(7Fa_!A#r|d<>6t=q;+L|H-USIVg z3L(szdbkq!UurvqU__Zb%-V5#x!!bRBN<{m(7d!=8E0wfd@3W=nWZIrUBeFY>)M(z0X~hZcz?g`sSiE56(GzjPteGLn%$5#Oby5M%ibyA{SF^RtGg4If+UCw9CV zwws<^cd-Q&;<(+?kgU10vBTbJZXG7NPerYf(X41A~2rVt6M^COMB^|rAxp90qn+SA<-0GS%n?8 zlU7?6veI@VH%PPwtB3Y7227%S_6+n5!-AcBI7I7QR|a48RdlZ>Fs^MK*Ikm(L;rpf zIy;%sumOvl^JXKOIeQmUU)A-sTHTN8?WP;`EG4iB(ebQJ^v4hNQ@tH0;d-j_yhB!F zW|UE}Zn8%{cBC6R!tp`Dd2PCUowH2Bd?>g$PesbX5AYlUoCgu29mVu2s~f964rwD5 z!!~5g4eYy5bp&kQ5CMaD@sRkrOFW_M&Vo72@d{;j0_QTe#p&qX4hVL*Q426;_n`*% zygNT@-!b%TnXQ=19>Mdn>U>&b5O}s5%bYm2dxW}Wz3M*ou_MTnS1C7k+|qR)4w69( zut2NngK1{-!Ilj()gg2^4epGv%z7AO1ox@CP>=jpV$+fKGE>ALrZM*-2%p)Fp8bHV zW$fYFpSjJpK__ymA>VA5XuSR63mc}E$#2g1fdFF4{c!%tG&nIg^0CwVcPPdXAQyHH z#`&qy1lScEXQSN|u8H&t3@17B1Q7yJ6o%vT)rjDmP0S#jGsb}DfNamrgdBS|_{Oz8 zcWW*Q=kUY5^7pOD4?J!yXmD{h!CatMa3W6u`Vin14Nwt;lG(&a$*IsL=wY@zRVC}1 z=0Uz}8$6vPTXQWiJ(n#9Nb2)J(&?M@nZYjC$ooQ&lrWZZ(d?mkda(X=l-PmZX!-%I z%j|qbU1JSYO~N3u1u)%uBL}@KpK_q~0Du}B<^tEjxJJ`q8(q6fOswUVvplrU<4iui z*mA<{IHve6=&&{wlv{!U;kVfupK2$Hk_Ey-=C^F=3aa0-f+>G#S(+7CE zzzP5a7^a1A!gH6IY9({pjdmc63W8uJfHLw=vx#FOc%?9%(qoT*o@WlwqzT+VCw`%wFUZfNwz(8gUhKS#gK$?t%2KJ%tZCcd3ik z`6}HxaZxj!D?Vtq0!v2Gq*oUubW3wQmM6l7Adk4SMns&G9Gu;IxilF;Q4ga^s41mW zsL57R6<43Ga;%#Tht8_=5|fviyuyS5PUlr5;(+I(5Ie82g2ADq4GRc;wHno5tp-Oo z4G)|zTb;iVYtkCQU!4XdzeRknvn^MEXq1WBATk5{Z3;iXgB(M)Y!|a-K`Ir6Da|wP{R_k#U&72DfE$O=#z)a62}Q$`&BrKK_y&^+Jz^-jVHKG5)fU; zTEVFj(F1sVZCQwSk_x0l3P6Y;Cj!Q;adaGFDJLnKxNuSVBWY0@{rxj6$?3TiFbKa- z!_#8O?*@8Z8jOkrOGikuO2BsHUV`9DWH1YHj@tAgj?id~yjpLse;ijP=ZRz-RYMQgAi$W0K1Xjgw-SRc!Hrj>nX!tA#)qR#G zd^R{$ev`=AB`RH-XTPeTwiD)oA+=AOI!sz!_E!u!hp3VDtvIZ&>E!%IgdSBHZ00aY z5)L7k@GF-!bI1@(1Y)rr*$tCi#Slr%4HLEtO$bnOnKv&cfQX-wH%gf)6X~GoR0|`2 zCA)xie#i#+9^u8`ASW@x5&aYLa)+uQ2PdpLv4iJID3^5-HLwEwBTIwjVbXw8gP#aH zf(L=sQVi}!@w6!ho~OkS5EKugSOQ?&3G7Mh47d>YTqQA;WP?&WNszk_+eJ#|<(dT{ z0I|w4w?8hUDh~q0+@ZU$xPiQ|tyd7>mtgTzy4K*SG(IH?H23l^+eUp7&^kW2!;KcU z7OWBjR3NQJUKL1#Vb~CJwgyL)d2UpKxaxOsMkWN1ME{H_!Zt!?Kl6Pt+`yW_~2_FXbZEOm0scYc6J*|c>xT7%=|{FwDuSxUsMb@jyK(Q&#% zA^kgi<2N{NrD9bI{n$aK61Ue{vF5PWT9xC#P66jU*#>GY)TE`sC8Yd$%fTx z>!H_f@{y~n)x5A?tC@}9mI$B7O@MPAk4A@bTCtX#cbFqN9X^rVQQYJ^G!$>)8&ie% zzua$`zp{1mQhB0$rq0(s_+>i+Sp2U@cJZ&Ec8hAaq;`kYZok?cP`iU_x2$$6YPV1A zj;P&HwR=qM9#^|3)b51ZJ*jpl)$Wwq#Vag$)x}c3tJLqI3#zQH(a<*^4YQa5F~x8; z=6Khz-D94jo5V3O@BctvPTSaX#=*NuMH%Om+B*d-p^(mtnM#50$Myr)KxRsU309vrkvHJ6IyDolA6_`4>9eD-f7c@VtHpxu;3obia3h2-Ep;t z7q0Twdswlt_@QlZiSjmvqFgvgi$`Kr2-uid3dOq#L!26*hS)t!^#nSL zA=WS%Xm8c&`LXXB5~>1%6x{5QEbL zsvoV4QaXX!&fg?VGMHtBKDa3+H)gRlTU4O1`tU2TMqbNd?KYU;P*@wz*qg8hArZSP zg|*=v*6>Q>eprJ{bzlu_56jKqzzZ;Ln3X8yk0m1>18mI?XAGRmn zxD5UR%QR)^g)(9RND;6>m*9^uBsv&KqiG8d3B^06ctVhl#qfPl+>l}}9H7Z!00@fj z=93hNyD3<4s>!aY@Kze&J_JHP`4<(u1nU>jftR}ky`WsCypb$Y2nZKUpAgHrAG4MS zHNYw%99ilrE(}XHi^V%%tOwq+3Nu*9r<9h3(!4ZXElUOZOj|%CSH)7|8b?Sc=w1}3 z!J@jRNgJ8%Wi__117&$~1V{FF`7dO)x}cl`<#-Tb*D| zq-mejQKa@uod}$WV@J3K(XN(W3jV*!)(uSjCzLG)AQ<*KR0O#(OJ%Q#&NWkyAav9A!X!83`|n^7BP$#5&1twW-nStC7(@AFvmX$9bO# zjdSM|61kNy!DxxuJf8)={q$wa3=3f7qjO*61g@A^*Hv07@jWE{)-WPu6L4RK7;)rp mVGM#ZW|bfaoxh>Ni0Ut#Ee>1#g>t!kdZaM<*Njmf_V?dj3=-%7 literal 0 HcmV?d00001 diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30d94445f0a0c941ee46b6c4fa3bd255e662f6ef GIT binary patch literal 2340 zcmb_dUvC>l5T8B&cj6S1v}wr$X-SKd%5~uhP(>h$k&IGTvRgn&D(iB$PA@s%o$fXz z5y=AtVOzZO)Mvf`?|cM43}2x9X3kDsMZDG4&dtuw@3*rvb9;aNTPd%;dewAj{43%4 z6-F|IaEW#x6}c82DcVtVx+v2O9a-dOXeUeR``{L3b&d|p6jnX8C{7O5ZHEFAz? zAg#zNlA9ByB(hAKY@MOa3jk)x&C{>gutGXZ5r}n#b~3zmr&{2M79hUuJZY_%@JI(M ziDL(Wj?3O_{909oRWl3Gw~uspyx7K^k~N5GZKJyJ#ly4RPimh(-*ea3)~b6C_T2kx z8`WLic)nY^|9nHH4ioX1!N_1FzeAi6c|@aBQ8X%x#iCJ>OoqHHjk03N(I_WMo<=26 z3N*4rDbh%ZQle2_lroJnG|16F(ZHfXi3VjFWN83no(4pN0u74Po8grIhTRJ^EFc*c z;%PZ7ix_-l&P?(LET?l!e5UBuxkXYKLsNw@ihfaPVa_aOJ+vrXCN-4f0ET2Qq42{D zU1X^fye7qd8Sz_%UvW&&em#p)*I|i5OL5o+BD{!IPQGzefF z6A7CA^0ai@Er@-d6dG`B1h^A~DXjQEu+jvEl1#%sOJU`!uo>QM_7a9@5dw^|o5F#m zm@p#hbC9APku$HIr9^j}vIAKFOyA@i3eWG3{ zIxkRwH)e>o$Wl5#EAS>B(n%yi{GG-=Cow+6Js|1g*nk3Pu^1&KZ#*i zWPmpauP=+>*!RU|WNPALEz=&74Hp(Y+fOta8&f7~dHkB9=1}c)nG}o)>uL zR9tHw-){+v+GIg47gH8jSD!TEIE+mN(~b$FrqQu&yfBbpT4A6?dCF07DBnmZd1wc5 zcpN1Xg$~@R?9cYZ#9nY9cF#SLkOF;ToELU1A@vPkZeC#YfsTc|7u!zCa}voj)=8Cb zBLVBc30-F7Lqv9*=q|v9*V9?g4{c*6TRYQBb{yNM<4Y0|bc5smJ~m}+xPPb}(|r+! zM`rGl%L#+T*r4ZICZ$guC0}yOcEiBYQ|sw@tMI2}1ET&c(Q#5g@?y{T!9(P#W zX0gKJ9*esW>9>Z5Q%*a@zrvmZWcT(e3tKW%O|TMg;kttU^v-rjDngP~o6NYe`? n)!s$2k|Nk1^+WgA*I)9HlN%6uHH!vYlm;tVdFUTlrBD9@kb}!4 literal 0 HcmV?d00001 diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 0000000000000000000000000000000000000000..f16ebf7cbd4f28620c0daba2f4a36ae0196b3d4c GIT binary patch literal 10819 zcmb`NXHZjX->;L91QJksO9Fy4X^NnNiVC_BuprxlbVKhX^w84?z4u7CGf5evn_!Tr3?d(NECJ2Pu0AJ)uTvu54bx_-a^t*&`j>8i;TfD`Z)060EAmb_Eg z)wf#RL@#+W)ka%x?pW*}*_fIC+&j2FF}pLjxVO0SWp(}A+xH6{y(=4A-?w-72Szu) zd^`O1{b+b%YyaTK$DxhUsqNjpgDLv%hfiy@)VYJh9~E^o`Bf9b$IM!4PoLaT)mD=~ zFUJ4`006?j5qF#|Ok6F@g*p)Y6Z7*nj+PjJ@F5rmKRHY0xC_AA8$W@FCrO7u?zADA-VZ`x?tC|{EeZ$+PS}L}+>0Lu3YulS{ zT4!^6L+^W9RZs87mhra9j_Iz!%2vK2CI%TW6b5HzImN~dVJ!O2!OQxY?dQ)g-}ZOc zHbx@}1$YJ+?Kz}{t&;P~OkgsY^BS?Vr*pM7Zz+VGy zGfn8J#m3_yoX+b#a zDpjI8oCMB@qx33`Xw_VU0cm=BragGsS+$bevLq%hnO&KCcBH=3mDfvuPp-Rdj{6&R zHa8Kz$xJs|?B}2IQQUsp?)jr!!0=#iu`;qPU^}gDYqibg#q;S@SNLF|EZL!#x_2db)8GULs#w(5+~toZv=1YdVas3W&uFU4AzBgx@vEBz#;l z-pu9tZ)?LyrFGoG)7w(&W-(&|Hk`4;RUNspd=I*5q2ess%;1CmgIS<4v~VcVWaPQH zANrnVeu}B%aK0|an0w{9^-)|O9DS)D9?uwbcai9O^ScC@pw(ezOGyY?wF-n!Sm#Ke zZy3)y(6~018z!}0`0yn?_&rY+<>YT(f~^#{5m)wlacRx^dP!x6)JAFbi0ww11o>n& z{9EsPXXod0%vV-{ohI1IX%fgQW4U^o;-lR1D2o$i=iuqZ>VxzPB9Jhr87P6lwqYLL zzAmxh0lsOW7;-|8F98?s9psNE+IvPo(qi06Ws!-wc9~e{KMO$+^=Bn?AdpZ9E6B?F z1A=K_u-m%NuW@h!26eF-W1SeBa_##tKQP?qva-5}9POT49d`BGnqS=b_~meVq;S`! zgb_=kIvvv?GI&6n9GF3ujY-D@NP@D-pFf91Cy^u1oI&?!IJb}(xK)nDnXbl&f}s>; z;#;8si@QnEHk&J3gSNcYVjbRBWRIc( zvKzVb5N~!MFj(-4jL_9=*-X>+JQKFctTLloj)kA> z9G&XPDujY_f!g_$c1eN~62X^~Q5s<^MdWux&z|6ZtNq_x%Nv)pIs% z<@aqc7|CWG#B)lO27spFDxV{ zdYr!DyZ`O|uSUbIfsI>M$}+#ezuJX`W_;eY)V^^4y_63+2J*_W0U% zepJ^$z`V=Il-pRMH-O)ERX<>)`;978*j-52{u7^zX^6D`^EA)}O5Ge(0N%+LbVWZ3 z7{O(%Xc&Ih8OP#|MC*!1{kR+_8YV)k6N#2`rwV(Ym*O{JWIvY#0x~KwnK+*jILrpH zgsEGa3u@KiQ+VRWO+tOg#nRJmpN$nKR4-XirTUoGoH4@A*S|=2kmW7Na^0?<&XIR7 zOh~8kdrdpif%1ohd7+!_eVX}T-u z5(Z#qmf?p!n&=XXnBierie$(vu4-qu#{&x`ScFc29Zax*vO;;4wG^3Ly>pi2dQBUe zbn0P&ztrG*zPEG!kCk45uPjJ2c}{EBa&mFKYwyEbh)Y9wN5z&wFSW8F5PKN_dY3b9 z;`7C$R?X?Lu(C_}IaUiLI}_}`M=#WHpaA=gENGdp>e$%g{UTdV>#4Ev@4p1+=Eh|Y zro1mdCCr%)mL1?cSM=v5Lq9Hsj%wJ?9j;^ro=bC<80f*9MQpkSM;v zL4@!y?`T^T#xa7`-zO=YObN=sCp&rsK!P*uQ*+WYZ3q>q-g%jJaSkZK)98~=ryUBy z2k%0_Pmlq!a)q!m4GqDWdi$e#2Zx7ykyeufGs7^bb-%U6(h%L%cXMWRWg-Z<_=RaD z#O82mXO+I86OmbWX%C*m%p0DiNZOz0kJ1LV0C>PqrE_SsDg%>{>IpYgh~2Rqg2z5A zl6$I}Fc>=JX*Y)`iIH17<*mx=<%c2iX8DHQdzM=1%E1)FU8;FN(#6;jGWN^Mz6`Zg z&<)Aqf{`qahF9kBr`Ecq!X-Ie#WAW3V6YsIbQW-t+~{$LdQAb4@DSW*OFC&20rFy& z2lu6B?cSk=GAiD6KiHjZ_Y_mQlBneTpfipWpo2P4u;}B|Sh(zvl(O1bEWwtawu#vs zDoxa*pm3GTgXO|jcs36RP#Groh?);nb#dXA(O`Z1h~(A`%ekL<(?k`t@zKbxR*vAa ze{vyYUvs+mw~hW<#_t);DWKTB-sc_>zRKTrezuR!80~y^%k=lqqYE7S#8-7r8!^k* zu@c*hu!cZ3ZM2|$xRF&72sAaU>RIVf`2g2N9%~6ACNs@m=ChA#32CY?oRJsOQZWma zc%1tn;!=!N3wm^FY!rGnMkz1+0{;Ffh|Fz;WC!gr9iD1 zMbiLDHYx$kZmpCK5pN38^Ko=!}~{zBHD+P|(rL2v)lL1JiZcvF1V8IYn36ul$2* zPXmQA!6NM!`ns)A&3bQWMdGq~84qeH)gynuzVDvLTL$P*=d`W^bhACum14kqbeDh~ zp_|ifzbq4R#1?r$S+!ur`v(`>1hn)7e!bfAk@|=bIq{*e2U%I=tP_;ClNTT5apGYplxKjaubZEPcYqHo($AKNbHXsNM!Sbn zpuPlK5683&f*%7Glan49m06IMkdz#S^YTh8!&k>)Q5LLLGDaR42)y$IEGJ~K4n6_P zAQODB*FVbU)6jIUwO{nq^e7bh(SKoaEM{_GXoP8VZhQJOYKy0duT_T$K{jZrJ6 zfw0{Fa!$S6Q1uq;)Ke8XJcRIsl`4m)RxJ=gjG{p(&pB06gdp>A&;ucY1u;hA@7ub& z=)8wX@>Vws`(G7+)8uU?3}IYiq!ez$5SjI=<^Du*7r}U{i|A0&xq$EHhYH0#xytqD zzomRr?TY89xoC~YI5bFd*^bjJG7If(LTG|fBTfvJ%7Y=zzH|H1k()r@ou!h> z^c2FBs_5;*M>-Ul@f1okzm#+hBjTc!zu%o>fp8XrgZ zb5XdTUO|K~2l0yP-o+uH3!TxU&-#3XzQP71v?a&Cg6fBP7-@|Vyo7}s6H2Sq;U2fEjE zkDvm0ep{%LSKY)_pu9H|@MIe5iAgYjPT~W35#u0~vye{i^DsWa#1<&+X<|=^;C5lQ zvC7R3vpc9)xY6LLX${jrkI!8K%G_r?vWb#kb?{IwdLZh)@Jt)k!<`T;XvQ36!|x(V zi?#Ip6kOet_Q75*--oAbZC)NW6`Yh@Sd?4ao}73o;S*{qpj zdE1rZ@Zr1()!az|g|P~%@v5CXO^{^wIxodG^0?|%->BW?r@++ABGReRtzteY-tnA{ zqd+k@7@MV1m^)G@97TuKCtcQDW)hApI2*O&|3fM@EF2k?v=R7oyFjHyjE2ZICx8BC z?*5$vD%_%g2AuT}riLuc< z#TDg;X0vJ@E_Km@`d`2|0+puvSj8QxKo3Mhz5?&)bGr2ku-sVsc$fL%W%-zkrwSl< zHeNm$8jpY2H@p-tP+oyKvQkx(#ybswbIwySsZ2k~MU_I6Ni5HdOqI@bAp~`ma%jS* ztH6w0g7yo-tx|--@k4~tnBoOvqrN16SW zgW4D&i&oMCH@wLn4sL>_qU_#o5H|HKzTHwb0SPzB<=lHsSRr_%;GihDqkEyKoD` zNyxD^)TZOfkwRjILv>ZLWU$1oV|{xSc@Yvw_(_QQ_@3CkI0;TezVVX)6oq#;Ew$m0 z=wiI2DSrUtSfM4bC4hdtRR%JpNSTapCqPq(L~#UJK=Y{Zsyo(kuYdH+TQb>O(< zlqymlin{i7`BlJ$PUA-{ov{8{`3uIqUq8R&tlRl^UihlR5|EJ3kg^z+A^@#CYa! zQj!Ntzarvlw}!R_4x<+Q!1HOj#=QEugnA$Yn--geg{2oX17fHD`T@uwL=hO5vemwv zJg>L%GEK5UEeB^VZYHSm&{;AO{9#w|p_Nwa8%ufn0T?e{l0KsD{m8u^$PoMRFuGvg z6TWU7NL!IC%8XH$4Ek7h#N%{^M-Nyur|B`4J52DfD&f4QR9u=b&sS>JR<6HkIi!Kz zi2hR4$qW71Y*l)k+c|f%QPjA1?@azVYkdItQJ>N@S!yDA0?M~)vk=tW7X*wJ+SWaP zFHTE(KDs-)c(nXhvs!U|P<_IALv`qAO7q3$wW8oMO_*o|du}T5T|YvpA{SWFxZTmY z?Jdd7pn=ANeD{7x5FPYjGnwIU(x!T_g81S%{qLIA8R*Bsg3v1$(!^3Qt&p!?N{gm}IRq=E{aKwWGg%&?lZQ5u8=5M0ZLcR} zml9LnJ$$VI$(s4)xf(Xi(pkxBJ^acK&NECv1`US2+@K z!jbFm1H!$Ugs;zF`-Gab#;#&WcaE2pH}SUSP_j411qlldfsuT?ph3a0;kYPrG^<-! zLVyoLN@{$P&*?08+pvf{EB~-!R7O$^-aUfi?Gj?Ij{6TvTu7|te^AoX!-VXP3x?2` zMn?OyLt)*cGvh_E(=#Leb45doOP@k*yW#8ojc!A`i&NQ!$hFVV)IruEVXA~|s)~#K zW|#=0))OED2eeX%PM9Du;|2fYD2#s`MN-vFJYgeoQSM1` z-njF^horN>V0+ImOX?}2_jzu>o{HU1d|g z@gsXTU!(Y5E4HPv-`7phyJOMpBzEqK!^h9ymf{MNLhbC$zx>ZKhm!3~hyhk0k1s}S zdg)~NTVGDrh^g{`rI4mx_sJi!U+o&iL^CVXD&MSOyl-77Z<%x&8f7-PerpP2JlH7v zbpJ+_vn!-^>vm?^^d=kx71ajXz_P@Xyh3Ef0mmd ze3o=dp2bKK#@)s`Qi16&r8%&GD^>`!)PkD}i6r12dWu}LI_n@2;uKNu^1b9`B*AwFL-4nvGGQJ0!6bA#Xq zK^qYUHL0p@cdyhD^vOm_6_EE6p)J0v2^*=DWBG(d11~rQlYT@~FKq~z>q?1KiGkWT z_EXg5!b&dpAX{3*HL7Z5ScC`w#)IyPI1W@)RuEZybk(ZiqV{qoCmM@y4=Z|*?jw6& zXAEEdkgeK=haXram+vgt!jDlU<%oXf= zjd?VxL3|vAs)Qk0@Or6x9Y<{?h(R0+fNcFrwGQF#ku92mTo50adK*xj5Z3TZKq)EY zK2^yuC{jv>?nzt0%}0NftyG?2zoUE5>$rA9$|puKgZgf<2YLVov3$A0VX#>KWBVVD zJazs1{h##nha(mLO+T4zpnt7S2>`XUNnYY0<9(Xq9~MKjgeF7<$9SOvtb;r}VNMCor~N%bb0QN13qnG$C3!*FF;$er z0!O5}1rS1^rm}!Y!W|U4;qUYmtTUqT6 zaah`#bPO5(0u3$PU+F*kzTB*uDjGgpi`^Le=7KK317RoatDME=Z9y39{{aeDGM(5F zAqYGU7qxK~xpx8-v|;e)#Sck*oTWWubg>?@YgQ?nhkGIq(`9`-SnfS;)y_O~Az~5s zY*|a0H7LhiTWEz=pva8sPY%St2SQ~`bwmG@C!F)Nbo@#xs201T)ulbUZP4W8meIzX zR8F>obJYG~=6hrcL2z)6d{AtWhcJmMjBF?wEcQAd3f>LV16tP;71XtSyej&J$4(Mf zc=l}w%dQ+f{( zPSwy0?^F8H6Ee5eE=*xxR~n_f6y_O+UKH%k4Md=|(QroNEIJV>*PmzDZDO4X)q89` z7T()cAci)&+rg5=q9S4FY$qS9l6uq*kx;lP4oHNwe@!5E)^H{d*TF05y$AXVu6<2 zf}g~IJ}?`oRcly2j+Zd2U^f=AFBT}BD6VQeo4OfXmcl4qO!Eq)9{ap6vQ0MX%&=-H z4f!Rp4r6o=XfXl-))vh!6eB01jKwV%8%NbB!eO-o8phH5EREBYE8D5L4aYTO>(rWZ zyUf6=6SR`3m(!zip_})DDzR;F@wq1O26Ed>D#ET6z`=x7f#8-5R-2PALXGls)fxf$ ztrlJY3WK=5OCCq5yO^KVi-&0Ge06oP}- zG~e`&j`7ueaPjq;HdRnRVF}+eEERY`{rQXbF2 zssTI7ci6=Nnu?bOc=Sj9@ z9s{y1Dpm0x((Ve_=f-!=Lc4}7CVV-ZzYCGeEe?cDdad!oRwY%8yc7Xfz$9tEJFo@H zpQV9NV3)$?;CPhy&_w^<;T+26GF>xUZjdG#4(56S4Biu?;zRB9RM;_aTcIVgtr34V z(V)-brMXy40u{8TCubSlYfl9vCQ6bPOs(Kn5^8(#tK#t=%k;u6Bb*8>vAEl7EDXai zb@mAVl!N>fQpD8)pTd`cR=Eh7U=YyFa6HLgmXgBb32c2vD4a4{IaT=a?@p?hqa-~; zU$$8)JxX8<&RSBvoRVk26(F@)l3yiJ0&b<;+l=hgk1DOmXlLYbYinJ9etfyN{+wai z2jA!6`4X2TxuEnPA}Oa78s3HJU(R(V$|Lh?CMGC zsG4<=l)!dze4Vz%ve;wm=p1anG36pPyq+Jz88yliqFE7XkTE6{`ucoKi3KitJigp= z=0@teVqPw2CIj%U`h+KSEC11wKRo&0>g0d5(EHD zUnYI16H34#y~)1*1R^UX%+Ji-BNRr7^&>=`Bp!yKM4S&984(3d4uDzN;qCH$66}Is zdd3DsRE0QsK^V;c!3l_i^>hm61Sm|1-d-d$yBgVj5_6bZQap#pMkcV;(=#n$7}&zt z(p=XGPCj`Jtc`9Au5Rxv?PY@w7eCz&$YG3}y-H*X;s3)Epb4CduP#fSs0NEgsEl5fJ6E7xf#wA@dUw=)&B*)yj_$#CwHVJ#y4QFl!8 z!IB(ny|Gtq^dG?8#5+Pb3Z28Go=Ma6U`$F}St`pA7CyGT@N@svB?=E-<lhO5|E@f1c-g)%4-6as`B$nM3B0$`o#9 zb+F`&Vnn)Q({ktR-3z}2cE4=5a%=Sa8F?)?z%P}0uuS{@+>6Mqh(LKZyVZ@gkMe@P zTvcqr#eDuV6Xvk41Va0!0#!i~O$h+I50y2lIInn~7v@OBx#xfV9KLJ?N^8= zO3Eb##*8pg@t!>BYJUImx^FMgr!0BQl9wOiqZP~tmeZUtt^EAPR+MB*a$2&@l#647 zm6etMdi^-w`K(8phTIhrK5D30>|ExHJ)F*LH{s)GCcloUGk?r`F~>0e}+8&W_xm?DZA%b zl_IKzw|2R%>>e!llzR&Xzx9i_a&XLmgCRZ1C|`d+0x5tL=w?qKhr=T6@x-`De=}%k z7#8N57?K14nQKXb#5 zPjnMOB?9Ci%zQ$BCdKWc9DVvbH8c5qNg%b1FcF z2of7wB}+xM|DUiU^q6qD7DVq$k5Uz#wB_SkoJNlB=r`Ika$cG0DML4TZ%}kAxBAwhl`K(e;;AnS&v+`X@ zdgZn62e02{+TFOabTrw%GV(`LfDG&^m4suntv6y+7hemV3=$h{lt+Q9Reh3pSoM+$ z<=5>HRuCi>3cp}B&0?`*)wj&H6uVuA_73;un>XgqI~bK0hX&;s^z8DPG75CXFZjgy zYUnbu6Yrwo(6iR;!8sSRP)<*XBUu0)4oi0L-$jEjBQtIE9)i4r6;&Da$_sm-X3#P5 zB!noH>N@nMJ`9R=ELe^kD|(H`;nsuk^6p2HiE1ehl+`TU`9zD$K^>CMlM%|nL;^EY zLb46)sfwtNfZT4*<+Fzxc}0EE`nIluet|QEpQo32aJl7hyYSi$HEGv4UkPAzeKXC@ zfQQ{6ue>CHr})+TS2P_`>$wf-10^G#G<$^B*#~F5=j1-v#5l53iNE7nvvm)zSL3YcQq`!7K5@OdZe^d>PEMBXgo8Sj2hPG z6GL8=rq~emG*JoanRhRBKVUEv8@f^wU_LL_7_0j literal 0 HcmV?d00001 diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ad4ca66a17637746a5c33e5a1cfc46e35754fac8 GIT binary patch literal 21792 zcmeHv2UwKH^Z&l}`q2ag8^(eim7pLN>;*+)iBgRs8VhO=>2e36#%@FrrA3LTNz^C? zMVis5!2-qxh=LspNE9QSl>5!zduQcPN%`ja|Jlb6yKmXq*?DJoXJ_X`h(HX&i9lw^ zR0pBYq1?SYcl~91gciU(Gc)eGgC0V8RtPn1%3U88z{|rC($!VJPa|~L5up{14#Oov zQz1N+j2tm!9Da(yXCZU--ZcXu>CKTt22SuNN@KlNyG}GYm1E;K!?)^yTPKTIXI70F z^-UgWI-;%qFFH>@xo^Dm*I#wEMnyGESP^15Y*rhCJl^uU$&5n}va?G{O1zYr(Iwf}wglz$%P9+ZT$XV*-Ln6V zJ59|HF@_vyId}UqME4|UyB2nKQHa!a=)Y|+A{;4Q-S%=69x?X5t{u9B=x~7k&z{q>evb`!UXIQN-_%iw8 z9Odkvo%bbmujyp)N5?-motgMJGPpe8uIrlgq?i(GbmX$4m$%Cr;^1(DIYSWfqh93> zqtux>eqIK)hc2~UoL(I5u>0_$!gZ52Jesv|?!6IL61Et08`Cmr$4uM1?6CAhi7{To z$0ZabPS1(!*NZoJ3*SuM)6*lvqOj_Rn1UxufAK3lYMI?4bN-=)MFE)$bDv!ZanBkn zJUXU2Cxt4ru6S6G-EXE>TF)Yf{xPl>oJ+i(4+^%v=q>%iIwr{ZbfToyk=emDEsE~X zO7tAv*G*O%y23!`n%|s7cMK*iz4ap7>4;Z)`N4o6;+1J*g7Z#1$n^+(eEE|9ow6Q< z%CaU?nlIY>lLsjk^!(9sx*cja`qJ66`?CyczbL%up8rK^aiQXQO8A5P6@wM@?IAt} z%Lo5n=v+W=vrd^8?;GHxo9ulm&3JtUJGVks<50Y>N^YN9`t^gv;U2fmb55R2PubJ= z`mAQR){N7$Lpw&D%F6bi)U5Nds$P2!&dJKx3o86-zRtES^Ib-YTy7Sm>`VQ0k#!1H zn&q(gme;MTeTuX5OKO(gNRK`caVnzQm}1{9kRY)=GYq(zV-~> z;YNFw#LSMLar>8<<~v(mO}bDyywm3EpLNVGxV$I!(6r#2p(n2$ij2xGSn2LkR$~(# zn|Wuu?%uR(%bOhvU+~L!vDLOyPu2QP`|5b<&{CecYt8+O`=2~&clFjDePFxYs9`^J zUgKZ6WA}@qj9k;QiXN3uO3&yW4PVGpese26wVQb>UY_HcdC#6YUi0_rlu`MOWAKct zzKYVL1K6^awksd5D+xYo$r~SdxqWPWf=x5!s1b8DQ(+s#U-h@Nlp{eZNf zy&-2ty~s{G-uk@NgX0fad#4d*C+?V@J3|;}6h4~ld?o8}+O4bMvF4&kS4X4J8pH9~ zEfiiJ&mS4*pNiZZerxO87Hj)wpW=c|lnx{TGfAL{*xYoo^Nz*?sO( zof*4#7g^oQn6=N!Gh&LR;5(O&UvctqaMt<$D&U};%DXk?A%%>v6eDEfZ4HEwNv`8}d1=5T>S`Pdn=PIm!R_ z$1VFqPWY7Uxos({nU(4GE`^mYqr6Xn&^j z!BU~`9wxe*j-qqs!{p?gH9?uB3y1z0CrU5cVnLiut@X{H_T&qX=X*W6wV3U2E_0m0 z{jJZ&Rm5dW!tFA`d(S@?G;8i=okQ0hCu9UJ>d=2yMf2VVhWr?HtTr!i&H$;mU(5KS z&i8jXCAI(?tNZjDo~|*A{VEyT;L5xQFCr(+Nd4hUpI@uiD3%XQW-@~|Tg)qXc-*Mq zLhln(%Z!JQ3oOn!d2Le3h0O8ugnir!(lU~1-J@5ZTAa9GA-=gXr|PQ3*xW72D5l`c z0g~jD88cCdbRCTMWQ$iF8XsI9QnhkH)o1H>=DmntJ6+*E9Dhj^v~g}(e`Vav zIltY0zOwW{LB#V%R>iqq<&O_snXsx=d49SgbGqO2(FHld!&9Hktcfa5$STZlR@^e+ zyB>bI_8n_m7u7}=Ou5G1zS~xgMn!FQKKJ=PLfoc}$L)m63$NNZOue0G;h|GHDR$dm+;%%GkOve&$!h`kTr16TKXlBO~Yjt>?@3Y@1be&q5IsyzkE! zPg{8{{N_;8#a>qIqIn*QbkEcCV9M(~C`~6cDM}uD$y1K#31CxSK5NjMUX93a#b#!s z5y3Z)Ki^u8b`gBOfgL)QW$wc_L$eWijMkMecq(W?^5v#-d3S;~iwPdI9ICO6)fz?e zAJmc?%J9^Atxc-V1jC*ejU4JQWbdFa!a6`1HrMD20N!zHOG>2@HKI`ljXKb%1OE#h zP$(4Wui%A3K>{cZfEK~w1*K4Y6i9d}zqTe3Q9czOlVrmE&@Kh-fR9uWK%oFA>UQfxr(&Fp|qd zNx3|X=J=Bnb@8cfUHgFhBz~tQLZJf@Iz9lQj6Mkc0=GILq(2NHm*WUIWpKGOlqVn4 zj!8!mnwWx+!)}CZ*MZ;K73i1(a3TPl>sxjC<{y@2QA5CESrXTqpwnbexM53Wm|-(z zph0V;xq!7{`9s|GB|DTllJAuIlATIjNg@E}^%{K!wUiX zO_hM%P^}#sl>#6@RX(|Za{>{062pt3H&YomZaf-6kn3y zD=|_olo-62CvK`RqrhuV+B%8ZNF|@$SSbX)SSBEcriKO3)aWHNHA+TP_MrgzD>$xX zDAzb1voM*@tiC5;p&#t}XHd5mml@hA8dQi6B7<{}zb9$k zhjaWgU&^Ds(gkcZ^kE&y0rZG+*~;~hd*|QdAIl#nN~6Efm->>r4}@tq;QdblyRk~h zZmr=nVRs1Bv?Is2HeIZb?yF}XCx7P{t|q0K$MM{S`=AEa0}WL7;1SnXmiG7o>p#q8 z&+#{)i6M(PS}Z=bY;gLE{A0egCG|7aIH5?Q<8U?qFc&A(GSswPoSwb>mfGmld;cHg zU)?U|A18q>pIP?|<{?-=wkUZ_@MVMy+i}lc8}l%rpaKJ+KLjvYu=ssE|9`;z^S1uf zCx4E$mw!o`r52u2t#7J6tg9f%_Jg^{G^`1KEe++1qyItvp`QfFQcls_T)Gx?C?3{= zo|8DPS)oP`j7ZwDFDqQpn-v7wutK>tD_jf+(SpVBj;uE;40IFep7-I_ud2D+`c>Eb z-%$Chz7wp|VFES|#_;MgA-l7dBKyu%^Dn|WfXm%k!Q3z%W91^Lp)yEnr1S&$zXt*E ze38TuYzLM7Q1kcm&y4|^8tN2&gA=0}tU8I+*4e&e< zaCx4%$1B_7&OZ)tW=YFsy zo@2X|B)d;g^Z#-U#(G2(gb*NjA1HW-0V9zl2-co2Nz}-{9{aNa@_)I0>G9dMU;}M} zy}+g#0lVr6Nw(?7@vpJtK3@Ksd3VGs`@cT>`6Kw(*u*OPfh5U}BRJWE9nFiag+0y* z2-Y&@I9Bx$S0L4(Ab&5Qu4=8uG+_Vo-G|GcR?XFTO?qXoY3#Pwb4;1D^%#1(%6=2D zYs!VZ1hAneZNg7MUH;PA3;l$D2z{XZKgNCrx_F;}h1$Xr@1X^-9|jv5_F)^IkVH2J z{7l0?2!g`8{4v1ege3?Gi~}C%|IYWz-!+btH;ebMU^-#hV*61ATn6C%Er!O%=IgNZ z{7h_na^p5m$4eLHoV>hh~w1fl;4 z-fQtyd#?zEkllo3|0hXw8o||h#XrbVSD&Q18hjkOG$E;OP_MHCU%E%bzxMeb%OCFz z1rTG2=WHxPo>&dI7U&R!PBW4)?M{=nU-D?5OqDN%-3WW%h)N+ZPDxV^Uvs>_w3$^M zZb!4@W{3^Wd43$u;yTmC2B#N@o0Q@8*GFPd{VxAlcALR}5|YPI6C?0Go}ng3!QOl; z_DQG#W%unY?Ue%i!yMpOHOC8h>tW6WpAg>n(r()k;zQpx`*zjXQ}N%G5qJbijcWp= z##I2ESAQr1;awl8;fwhaqZbP#1{Jf!O`m}O!4mvelf>Y&;DqByXj)&Z!g;Lf) zENRk0xwJ`3Wq6Zj%Ebm|%8o*|5B47#sQiZsNP_8 zaNdtXzV&35<*EFXEGwX){En{^dSe3)*T{wfoB@#?5ZVNYXp7JWpw|L|eg`5n_7LD8 zLXMDQsIIl-4#^1F??q@JjPovwVSTa(I-oDAgk`Q?H@G=leP5M@fd4#57Sdn@L)}-9 zAxuvuUnY{v)-`(z&EZ-L)|$#?EA+6>Tqg4Y_yIz&|C|Eqnzp z8s*Zc1An6qyuOw-mK!DTu@V5g3z-4z0EDUpg*sB>I#;K?^RLfkYJ2H@@hzH|8XOM; z@2cz|ZSgeq#?1Un-yPeE*lv8UJnM=fdHdd~&`{IJuq8cLhlV!C&qC!J+lI=wb`6zp z>GCF`I?3hh;hg}mxBbE1*4Wx$!>i+J-sY)}q4CZw2v(S5hfuaLLTW#;T|C61WC4DI z7?jsA=>$TflMu364SRXDPmqhN&=7sQ{kmc}-o7_PthXwpNps~$ss2X%Oeze!q$}C3 z+TY=rl2_OT_YMH|Dy5<&xdOiBSO`mbV~_WC`x?IL%lojxCEjojF@Pt6enHP@A2|Pt zsAj0oH&SGu>95dl7ERiR@tD91LSDR*&nzz{2!qzI`-K|PzAYHMK z!S|{pi1x6rd^eHAC$>EBQQ*Bj_!_=_{f)n~eUj)gk@JtK{AX&v8Fvm&5?viIkKl8t zB#_uh!^7X(zQt!6x^L#3Pl(4X&IMm1oNx4=qIs^ZeF8e74R8*)tS_p<7q~q2DB&clOi<@JYrtqR*8M9WWc`+R17WqtD3Jkyr9M-?E zy5tY*&!D>M5JYPmSOd>ve=_*N(_Vk)ZTZ8Yi*O)MyrDig3w3|{N@Dl|*56f_7LQ&8 z=cf_X5K9?Bk=9dR0k6MfmhzZ@^W6Ho`VW$5hJSW~zo{y{-M%i%Gg@ubN?9M4<9Jr^ zOWRBJw{Z4H=tW)ecBRhSP{p(ESKA-UuM7CLE-XvD&;Ag1LnyI6SRk~k2SPtc5mK*X zgA*VQIQv~-n`{jDj(e?zDQ$?l%PM_Pm*3GpxlBeZlgo8J1WROcK4*VMnumtQ^aS(> z4UK*U`Bn?L{F_!>ytJRn-z$^JU)r9y?%VSJhps=^{>J+Wb<95AW7G?H7yEV9`;RKv zf7FHgWA^`{{{CBX{ePw%&3O3#HOGDu_|I%qzsYO#le~YQu5H}br#}=Tc$N%*K|fZq znol4hN%V4p*uyO}^%=z1k6+7B<5n}&XR9C<-=BcERzh_ijwxzlZM0HXt!E4(bex2e z%sJwwB{~Gsr>JS$MEIPRje|4t1f`I-9f0%i;kE{AT+Dx(WL*QSRqNc>bS+f!m&0slvGhrKsM`N2}6T~}Wy9Xy6L!dnYeepzr zBz1<^_yO<_^xdN|eXxz-=QR}6#jyuCW{D5+2YA0^K2*ql4}EZo;;6pt1GdRD`$HTX zgv%rg^2huG{fC@<++#TUX1%!jn(xTjg9MToK}^669PoU2ejd%jrznTN>e zmM-Wg3Gx@I^nHUEH?}nGyc}Pk z{~GaFpG zV|(!U>M~l$?uPN@7LTtrI>nXW{rj5!KjP$r=Lfu>G41m9+XT_3ACHk2aQ+@kC+<1T z7ZIe%1c6TKIDt;`c!5p|;IJAg69l?P=8E-8K|gPD`subs1pL#KTO3U~EK`AUTmio^ z_@2)!DT3b{z#QtL=ADam)FY_&GSE%X&zi-Mt5F*QWr*Xc4UjfbdWZ}$eHbQKKcm4X z76oxk_%~Jf_dIbBXS1zVz-)n7zR8<4bCIThcz@J}V#a(zG4_DbZ-O1&o@U0GQG9#w zVUPsbb3CjmYjJ!yO^pdx{l1H|52eU{vk8RfRMi*;9l_HXm$a>GjrTd5KqIIBN{$C5 z4!$x2{Fm)EU=Jsio`D_2#~0?b(9J=>+fERztOQKRRhVb4@tLK!0JkBQGKC5`Ok_0$YZ0DrX=;c+p9#$sFkcrzY#RP;r6y_W z3-ApKMzX+F$fiR5;R0sKEugH9+f=R31fTi(HbaeFs;P|UIVj_GUW)JaWF)>*gzQlq zHz{DSPgBPfOaY)Fg`*L$XQ*Z%T{FxFLcIU0BXcNm6a+BNZY z{aMkf86y2Vu%`a*Ce%3xcHW=PLiP|%cD4szp}r8S0wk^>s^F~7hHtw5sK_I3SvMJ(*k$AS%&NzlenDI=4?{qZs&8V+N&-0fbq4vZC;=aZ>OTH$Am)Oh++xAE za{`y4VF->UBqSmrNSADa?}7aNUVXo1_ra_I`;F5TEIWu-1iPV^3x2mH#SHcZpRSk0 zupEBR3%S7`{!&w=yO5Vk6CFn3I9?tde2GVgv$f97};1{7vw=p2(ndgMi5}h z2o_i|LSL}s=UXwNfc6yp- Date: Tue, 23 Nov 2010 12:49:57 -0600 Subject: [PATCH 83/94] Change a couple of the example story URLs. --- index.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index.html b/index.html index f5736129..4ee35c4f 100644 --- a/index.html +++ b/index.html @@ -98,7 +98,7 @@
    fanfiction.net
    Use the URL of any story chapter, with or without story title such as
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or -
    http://www.fanfiction.net/s/5192986/5/. +
    http://www.fanfiction.net/s/5192986/5/.
    fictionpress.com
    Use the URL of any story chapter, such as
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or @@ -117,7 +117,7 @@
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332.
    mediaminer.org
    Use the URL of the story's chapter list, such as -
    http://www.mediaminer.org/fanfic/view_st.php/166653. +
    http://www.mediaminer.org/fanfic/view_st.php/166653. Or the story URL for one-shots, such as
    http://www.mediaminer.org/fanfic/view_st.php/167618. From 708f750bab20ac33885752321e5374cd821fc546 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Wed, 24 Nov 2010 09:37:49 +0000 Subject: [PATCH 84/94] Added Mobipocket support to the Web UI --- index.html | 1 + main.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/index.html b/index.html index 4ee35c4f..ac1c0426 100644 --- a/index.html +++ b/index.html @@ -51,6 +51,7 @@ EPub HTML Plain Text + Mobi (Kindle) diff --git a/main.py b/main.py index 9a9cbf31..e124982b 100644 --- a/main.py +++ b/main.py @@ -218,6 +218,8 @@ class FanfictionDownloader(webapp.RequestHandler): writerClass = output.EPubFanficWriter elif format == 'html': writerClass = output.HTMLWriter + elif format == 'mobi': + writerClass = output.MobiWriter else: writerClass = output.TextWriter @@ -292,7 +294,9 @@ class FanfictionDownloader(webapp.RequestHandler): elif format == 'text': self.response.headers['Content-Type'] = 'application/zip' self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.txt.zip' - + elif format == 'mobi': + self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' + self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi' self.response.out.write(data) def toPercentDecimal(match): From ccef466090d6050681b2efea40d05e4904091b5a Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Wed, 24 Nov 2010 12:36:38 -0600 Subject: [PATCH 85/94] Change no-class div tags to p tags in mediaminer stories to get paragraph breaks. --- fanficdownloader/mediaminer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fanficdownloader/mediaminer.py b/fanficdownloader/mediaminer.py index 6a4c03a7..daa48fa6 100644 --- a/fanficdownloader/mediaminer.py +++ b/fanficdownloader/mediaminer.py @@ -346,6 +346,12 @@ class MediaMiner(FanfictionSiteAdapter): soup = bs.BeautifulSoup(data) except: raise FailedToDownload("Error downloading Chapter: %s! Problem decoding page!" % url) + + # convert div's to p's. mediaminer uses div with a + # margin for paragraphs. + divlist = soup.findAll('div', {'class' : None}) + for tag in divlist: + tag.name='p'; nvs = bs.NavigableString('') sst='' From 719aed32a98e1c23f7ea56aad36446f646fc4164 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Fri, 26 Nov 2010 15:55:04 +0000 Subject: [PATCH 86/94] Tiny static changes - preparing for doing some SEO --- index.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/index.html b/index.html index ac1c0426..c084a399 100644 --- a/index.html +++ b/index.html @@ -2,7 +2,7 @@ - Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org to epub and HTML to Stanza, Kindle, Nook, Sony Reader + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza @@ -35,6 +35,7 @@

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier.

    +

    For Amazon Kindle use Mobi output, for Sony Reader, Nook and iPad use ePub

    To support new features, such as including story summaries, the URL you need to use for some sites has changed. See below for example URLs for each site.

    Or see your personal list of previously downloaded fanfics.

    From 2d7f6c2e5c64d9f354d676fdaf2b78bf5a4bfcb5 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Fri, 26 Nov 2010 19:36:53 -0600 Subject: [PATCH 87/94] Correct story status for ficwad, change story status for fictionalley to 'Unknown' (site doesn't tell us status), change mobi file output mode to 'wb'(binary) so it works on windows. --- fanficdownloader/fictionalley.py | 2 +- fanficdownloader/ficwad.py | 34 +++++++++++--------------------- fanficdownloader/output.py | 2 +- 3 files changed, 14 insertions(+), 24 deletions(-) diff --git a/fanficdownloader/fictionalley.py b/fanficdownloader/fictionalley.py index 68cd36e4..b1a32125 100644 --- a/fanficdownloader/fictionalley.py +++ b/fanficdownloader/fictionalley.py @@ -69,7 +69,7 @@ class FictionAlley(FanfictionSiteAdapter): self.numWords = 0 self.genre = '' self.category = 'Harry Potter' - self.storyStatus = 'In-Progress' + self.storyStatus = 'Unknown' # fictionalley doesn't give us in-progress/completed anywhere. self.storyRating = 'K' self.storyUserRating = '0' self.storyCharacters = [] diff --git a/fanficdownloader/ficwad.py b/fanficdownloader/ficwad.py index 058528bc..9cb353ce 100644 --- a/fanficdownloader/ficwad.py +++ b/fanficdownloader/ficwad.py @@ -111,26 +111,20 @@ class FicWad(FanfictionSiteAdapter): meta = soup.find('p', {'class' : 'meta'}) if meta is not None: - s = unicode(meta).replace('\n',' ').replace('\t','').split(' - ') - #logging.debug('meta.s=%s' % s) + logging.debug('meta.s pre=%s' % meta.__str__('utf8')) + s = re.sub('<[^>]+>','',unicode(meta)).replace('\n',' ').replace('\t','').split(' - ') + #logging.debug('meta.s post=%s' % s) for ss in s: s1 = ss.replace(' ','').split(':') - #logging.debug('meta.s.s1=%s' % s1) + #logging.debug('ss=%s' % ss) if len(s1) > 1: - s2 = re.split ('<[^>]+>', s1[0]) - #logging.debug('meta.s.s1.s2=%s' % s2) - if len(s2) > 1: - s1[0] = s2[1] skey = s1[0].strip() #logging.debug('Checking = %s' % skey) if skey == 'Category': - soup1 = bs.BeautifulStoneSoup(s1[1]) - allAs = soup1.findAll('a') - for a in allAs: - if self.category == 'Category': - self.category = unicode(a.string) - logging.debug('self.category=%s' % self.category) - self.addSubject(self.category) + # ficwad doesn't allow multiple categories. + self.category = unicode(s1[1]) + logging.debug('self.category=%s' % self.category) + self.addSubject(self.category) logging.debug('self.subjects=%s' % self.subjects) elif skey == 'Rating': self.storyRating = s1[1] @@ -159,14 +153,10 @@ class FicWad(FanfictionSiteAdapter): self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(s1[1].strip(' '), "%Y/%m/%d"))) logging.debug('self.storyUpdated=%s' % self.storyUpdated) else: - s3 = re.split ('<[^>]+>', s1[0]) - #logging.debug('meta.s.s1.s3=%s' % s3) - if len(s3) > 1: - s1[0] = s3[0] - s4 = s1[0].split('w') - #logging.debug('meta.s.s1.s4=%s' % s4) - if len(s4) > 1 and s4[1] == 'ords': - self.numWords = s4[0] + if ss == 'Complete' : + self.storyStatus = 'Completed' + elif ss.endswith('words'): + self.numWords=ss.replace('words','').replace(' ','') logging.debug('self.numWords=%s' % self.numWords) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 10fb6198..25487d6f 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -109,7 +109,7 @@ class MobiWriter(FanficWriter): if self.inmemory: self.output = StringIO.StringIO() else: - self.output = open(self.fileName, 'w') + self.output = open(self.fileName, 'wb') self.xhtmlTemplate = string.Template(html_constants.XHTML_START) self.chapterStartTemplate = string.Template(html_constants.XHTML_CHAPTER_START) From 617dc21eb99d44a284cb1d70e89911660a21903b Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Sat, 27 Nov 2010 17:32:31 -0600 Subject: [PATCH 88/94] Kludge fix for uniode in story descs in appengine. --- fanficdownloader/output.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 25487d6f..b5a1ff8a 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -341,9 +341,13 @@ class EPubFanficWriter(FanficWriter): description = self.adapter.getStoryDescription() if hasattr(description, "text"): - description = unicode(description.text) - else: + description = description.text + prevalue=description + try: description = unicode(description) + except: + description=prevalue + if description is not None and len(description) > 0: description = description.replace ('\\\'', '\'').replace('\\\"', '\"') description = removeEntities(description) From cfb218f21e9eea22554d2c6701f208e282520890 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Mon, 29 Nov 2010 18:45:47 -0600 Subject: [PATCH 89/94] Moved the check for a completed story higher in the code, this makes the continue's that are in that portion of the if statement not skip that check. --- fanficdownloader/ffnet.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fanficdownloader/ffnet.py b/fanficdownloader/ffnet.py index 4caa13fa..d156f9fa 100644 --- a/fanficdownloader/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -186,6 +186,10 @@ class FFNet(FanfictionSiteAdapter): (u1, u2, self.authorId, u3) = s2.a['href'].split('/') logging.debug('self.authorId=%s, self.authorName=%s' % (self.authorId, self.authorName)) elif l.find("Rated: 0: continue From 30455bfda2207f5681c3f1fb3b7e39249246dd3d Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Tue, 30 Nov 2010 12:13:00 -0600 Subject: [PATCH 90/94] Updated the processing for fictionpress.com of the information line to correctly extract the Updated and Published dates, and the Completion status --- fanficdownloader/fpcom.py | 66 ++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/fanficdownloader/fpcom.py b/fanficdownloader/fpcom.py index 471c0b85..fcf4a7ac 100644 --- a/fanficdownloader/fpcom.py +++ b/fanficdownloader/fpcom.py @@ -136,6 +136,38 @@ class FPCom(FanfictionSiteAdapter): self.addSubject(subj) return True + def _processInfoLine(self, line): + have_lang = False + words = line.split(' - ') + if words is not None: + for word in words: + if word.find(': ') != -1: + sds = word.split(': ') + if sds is not None and len(sds) > 1: + if sds[0] == 'Updated': + self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) + logging.debug('self.storyUpdated=%s' % self.storyUpdated) + elif sds[0] == 'Published': + self.storyPublished = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) + logging.debug('self.storyPublished=%s' % self.storyPublished) + elif sds[0] == 'Reviews': + reviews = sds[1] + logging.debug('reviews=%s' % reviews) + elif word.find('Complete') != -1: + self.storyStatus = 'Completed' + logging.debug('self.storyStatus=%s' % self.storyStatus) + elif not have_lang: + have_lang = True + language = word + logging.debug('language=%s' % language) + else: + self.category = word + logging.debug('self.category=%s' % self.category) + sgs = self.category.split('/') + for sg in sgs: + self.addSubject(sg) + logging.debug('self.subjects=%s' % self.subjects) + def extractIndividualUrls(self): data = '' try: @@ -250,39 +282,9 @@ class FPCom(FanfictionSiteAdapter): self.storyRating = ss[1] logging.debug('self.storyRating=%s' % self.storyRating) if ll > 3: - ss = tdas[3].split(' - ') - if ss is not None: - lls = len(ss) - if lls > 1: - language = ss[1] - logging.debug('language=%s' % language) - if lls > 2: - self.category = ss[2] - logging.debug('self.category=%s' % self.category) - sgs = self.category.split('/') - for sg in sgs: - self.addSubject(sg) - logging.debug('self.subjects=%s' % self.subjects) - if lls > 3 and ss[3].strip() == 'Reviews:' and ll > 4: - reviews = tdas[4] - logging.debug('reviews=%s' % reviews) + self._processInfoLine (tdas[3]) if ll > 5: - ss = tdas[5].split(' - ') - if ss is not None: - lls = len(ss) - if lls > 1: - sds = ss[1].split(': ') - if sds is not None and len(sds) > 1 and sds[0] == 'Published': - self.storyPublished = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) - logging.debug('self.storyPublished=%s' % self.storyPublished) - lls = len(ss) - if lls > 2: - sds = ss[2].split(': ') - if sds is not None and len(sds) > 1 and sds[0] == 'Updated': - self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) - logging.debug('self.storyUpdated=%s' % self.storyUpdated) - - + self._processInfoLine (tdas[5]) self.authorURL = 'http://' + self.host + '/u/' + self.authorId From 2f0bb31e2487966af862566477d83ad68837e954 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Tue, 30 Nov 2010 13:56:18 -0600 Subject: [PATCH 91/94] Take out adding the id: field as a subject. --- fanficdownloader/fpcom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/fpcom.py b/fanficdownloader/fpcom.py index fcf4a7ac..ad89b37e 100644 --- a/fanficdownloader/fpcom.py +++ b/fanficdownloader/fpcom.py @@ -141,7 +141,7 @@ class FPCom(FanfictionSiteAdapter): words = line.split(' - ') if words is not None: for word in words: - if word.find(': ') != -1: + if word.find(':') != -1: sds = word.split(': ') if sds is not None and len(sds) > 1: if sds[0] == 'Updated': From f114a2438eee22034b89712ff3d088bb63445df8 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Tue, 30 Nov 2010 14:06:12 -0600 Subject: [PATCH 92/94] This one might be controversial.. Since none of the EBook readers are really for update-able stories I added tags/subjects in for Last Update Year/Month and Last Update, which at least in FBReader I can order by, and see all stories updated for a certain month, or a certain date. This will let you know to reread the story. --- fanficdownloader/output.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index b5a1ff8a..98c93827 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -337,6 +337,9 @@ class EPubFanficWriter(FanficWriter): createda = self.adapter.getStoryCreated().strftime("%Y-%m-%d %H:%M:%S") created = self.adapter.getStoryCreated().strftime("%Y-%m-%d") updated = self.adapter.getStoryUpdated().strftime("%Y-%m-%d") + updateyy = self.adapter.getStoryUpdated().strftime("%Y") + updatemm = self.adapter.getStoryUpdated().strftime("%m") + updatedd = self.adapter.getStoryUpdated().strftime("%d") calibre = self.adapter.getStoryUpdated().strftime("%Y-%m-%dT%H:%M:%S") description = self.adapter.getStoryDescription() @@ -387,6 +390,12 @@ class EPubFanficWriter(FanficWriter): i = i + 1 if (i <= 0): self._writeFile(opfFilePath, CONTENT_SUBJECT % "FanFiction") + + subj = "Last Update Year/Month: " + updateyy + "/" + updatemm + self._writeFile(opfFilePath, CONTENT_SUBJECT % subj) + + subj = "Last Update: " + updateyy + "/" + updatemm + "/" + updatedd + self._writeFile(opfFilePath, CONTENT_SUBJECT % subj) self._writeFile(opfFilePath, CONTENT_END_METADATA % (self.adapter.getPublisher(), self.adapter.getUUID(), self.adapter.getStoryURL(), self.adapter.getStoryURL(), self.adapter.getStoryUserRating())) # print >> opf, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName) From c6b79cb57bc42ec3a60e40fc8f24179d6592d482 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Tue, 30 Nov 2010 15:11:20 -0600 Subject: [PATCH 93/94] Changed the "Title Page" reference in the TOC and CONTENT epub files to be title_page instead. --- fanficdownloader/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 98c93827..da7503e1 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -406,7 +406,7 @@ class EPubFanficWriter(FanficWriter): t = "Title Page" f = "title_page.xhtml" - chapterId = "Title Page" + chapterId = "title_page" self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, t, f)) self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f)) From 8f58164ddea94731be7caef9f415838919d11f47 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Thu, 2 Dec 2010 17:36:52 -0600 Subject: [PATCH 94/94] Added tag fanficdownloader-0.4 for changeset f23fd0e4cbc3