diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..6f1ccdc8 --- /dev/null +++ b/app.yaml @@ -0,0 +1,31 @@ +application: fanfictionloader +version: 2-5-5 +runtime: python +api_version: 1 + +handlers: +- url: /generate_mock_data + script: mocks/generate_mock_data.py + login: admin + +- url: /r3m0v3r + script: utils/remover.py + login: admin + +- url: /r3m0v3r + script: main.py + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + + +- url: /.* + script: main.py + diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..1d9c70a0 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,4 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 3 hours \ No newline at end of file diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..f4aec452 --- /dev/null +++ b/css/index.css @@ -0,0 +1,71 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 43%; + margin-left: 23%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ + margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { + display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + font-size: small; + color: #f00; +} \ No newline at end of file diff --git a/delete_fic.py b/delete_fic.py new file mode 100644 index 00000000..73722724 --- /dev/null +++ b/delete_fic.py @@ -0,0 +1,59 @@ +import os +import cgi +import sys +import logging +import traceback +import StringIO + +from google.appengine.api import users +from google.appengine.ext import webapp +from google.appengine.ext.webapp import util + +from fanficdownloader.downaloder import * +from fanficdownloader.ffnet import * +from fanficdownloader.output import * + +from google.appengine.ext import db + +from fanficdownloader.zipdir import * + +from ffstorage import * + +def create_mac(user, fic_id, fic_url): + return str(abs(hash(user)+hash(fic_id)))+str(abs(hash(fic_url))) + +def check_mac(user, fic_id, fic_url, mac): + return (create_mac(user, fic_id, fic_url) == mac) + +def create_mac_for_fic(user, fic_id): + key = db.Key(fic_id) + fanfic = db.get(key) + if fanfic.user != user: + return None + else: + return create_mac(user, key, fanfic.url) + +class DeleteFicHandler(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect('/login') + + fic_id = self.request.get('fic_id') + fic_mac = self.request.get('key_id') + + actual_mac = create_mac_for_fic(user, fic_id) + if actual_mac != fic_mac: + self.response.out.write("Ooops") + else: + key = db.Key(fic_id) + fanfic = db.get(key) + fanfic.delete() + self.redirect('/recent') + + + fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1", user) + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + \ No newline at end of file diff --git a/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py similarity index 100% rename from BeautifulSoup.py rename to fanficdownloader/BeautifulSoup.py diff --git a/__init__.py b/fanficdownloader/__init__.py similarity index 100% rename from __init__.py rename to fanficdownloader/__init__.py diff --git a/adapter.py b/fanficdownloader/adapter.py similarity index 100% rename from adapter.py rename to fanficdownloader/adapter.py diff --git a/constants.py b/fanficdownloader/constants.py similarity index 100% rename from constants.py rename to fanficdownloader/constants.py diff --git a/downloader.py b/fanficdownloader/downloader.py similarity index 100% rename from downloader.py rename to fanficdownloader/downloader.py diff --git a/ffnet.py b/fanficdownloader/ffnet.py similarity index 98% rename from ffnet.py rename to fanficdownloader/ffnet.py index 3963d529..4caa13fa 100644 --- a/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -276,9 +276,10 @@ class FFNet(FanfictionSiteAdapter): return urls def getText(self, url): - time.sleep( 2.0 ) + # time.sleep( 2.0 ) data = '' try: + logging.debug("Fetching URL: %s" % url) data = self.fetchUrl(url) except Exception, e: data = '' @@ -301,10 +302,12 @@ class FFNet(FanfictionSiteAdapter): try: soup = bs.BeautifulStoneSoup(data) except: + logging.debug(data) raise FailedToDownload("Error downloading Chapter: %s! Problem decoding page!" % url) div = soup.find('div', {'id' : 'storytext'}) if None == div: + logging.debug(data) raise FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) return div.__str__('utf8') diff --git a/fictionalley.py b/fanficdownloader/fictionalley.py similarity index 100% rename from fictionalley.py rename to fanficdownloader/fictionalley.py diff --git a/ficwad.py b/fanficdownloader/ficwad.py similarity index 100% rename from ficwad.py rename to fanficdownloader/ficwad.py diff --git a/fpcom.py b/fanficdownloader/fpcom.py similarity index 100% rename from fpcom.py rename to fanficdownloader/fpcom.py diff --git a/hpfiction.py b/fanficdownloader/hpfiction.py similarity index 100% rename from hpfiction.py rename to fanficdownloader/hpfiction.py diff --git a/html2text.py b/fanficdownloader/html2text.py similarity index 100% rename from html2text.py rename to fanficdownloader/html2text.py diff --git a/html_constants.py b/fanficdownloader/html_constants.py similarity index 100% rename from html_constants.py rename to fanficdownloader/html_constants.py diff --git a/mediaminer.py b/fanficdownloader/mediaminer.py similarity index 100% rename from mediaminer.py rename to fanficdownloader/mediaminer.py diff --git a/output.py b/fanficdownloader/output.py similarity index 100% rename from output.py rename to fanficdownloader/output.py diff --git a/potionsNsnitches.py b/fanficdownloader/potionsNsnitches.py similarity index 100% rename from potionsNsnitches.py rename to fanficdownloader/potionsNsnitches.py diff --git a/readme.txt b/fanficdownloader/readme.txt similarity index 100% rename from readme.txt rename to fanficdownloader/readme.txt diff --git a/twilighted.py b/fanficdownloader/twilighted.py similarity index 100% rename from twilighted.py rename to fanficdownloader/twilighted.py diff --git a/twipassword.py b/fanficdownloader/twipassword.py similarity index 100% rename from twipassword.py rename to fanficdownloader/twipassword.py diff --git a/zipdir.py b/fanficdownloader/zipdir.py similarity index 100% rename from zipdir.py rename to fanficdownloader/zipdir.py diff --git a/ffstorage.py b/ffstorage.py new file mode 100644 index 00000000..78647803 --- /dev/null +++ b/ffstorage.py @@ -0,0 +1,21 @@ +from google.appengine.ext import db + +class OneDownload(db.Model): + user = db.UserProperty() + url = db.StringProperty() + format = db.StringProperty() + login = db.StringProperty() + password = db.StringProperty() + failure = db.StringProperty() + date = db.DateTimeProperty(auto_now_add=True) + +class DownloadedFanfic(db.Model): + user = db.UserProperty() + url = db.StringProperty() + name = db.StringProperty() + author = db.StringProperty() + format = db.StringProperty() + date = db.DateTimeProperty(auto_now_add=True) + blob = db.BlobProperty() + mac = db.StringProperty() + cleared = db.BooleanProperty(default=False) \ No newline at end of file diff --git a/index-ajax.html b/index-ajax.html new file mode 100644 index 00000000..51dc6318 --- /dev/null +++ b/index-ajax.html @@ -0,0 +1,109 @@ + + + + + + + Fanfiction Downloader (fanfiction.net, fictionalley, ficwad to epub and HTML) + + + + + + + + + +
+

+ FanFiction Downloader +

+ + +
+
+ Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
+ +
+ Ebook format   +
+ +
+ +
+ + + +
+ + + +
+
+ +

+ Login and Password +

+
+ If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
+
+
+
Login
+
+
+ +
+
Password
+
+
+
+
+ + +
+ + +
+ +
+
+ Few things to know, which will make your life substantially easier: +
    +
  1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
  2. +
  3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
  4. +
  5. Paste a URL of the first chapter of the fanfic, not the index page
  6. +
  7. Fics with a single chapter are not supported (you can just copy and paste it)
  8. +
  9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
  10. +
  11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
  12. +
  13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
  14. +
  15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
  16. +
  17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
  18. +
+ Otherwise, just have fun, and if you want to say thank you — use the email above. +
+
+ Powered by Google App Engine +

+ FanfictionLoader is a web front-end to fanficdownloader
+ Copyright © Roman Kirillov +
+ +
+ + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..4987804d --- /dev/null +++ b/index.html @@ -0,0 +1,189 @@ + + + + + Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com to epub and HTML to Stanza, Kindle, Nook, Sony Reader + + + + +
+

+ FanFiction Downloader +

+ +
+ + +
+ + {{yourfile}} + + + {% if authorized %} +
+
+
+ Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier. +
    +
  • + For fictionalley.org, please paste the URL of the story's chapter list in the box, such as + this. Or the story text URL for + fictionalley.org one-shots, such + as this. +
  • +
  • + For all other supported sites, please paste the URL of the first chapter in the box. For + one-shots, the first chapter is the whole story. +
  • +
  • + Alternatively, see your personal list of previously downloaded fanfics. +
  • +
+
+
+ {{ error_message }} +
+ +
+ +
+
Ebook format
+
+ EPub + HTML + Plain Text +
+
+ +

Login and Password

+
+ + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide + your credentials to download it, otherwise just leave it empty +
+
+
+
Login
+
+
+ +
+
Password
+
+
+
+
+ +
+
+ {% else %} +
+
+

+ This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

+

Login using Google account

+
+
+ {% endif %} + +
+ Few things to know, which will make your life substantially easier: +
    +
  1. + First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
  2. +
  3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
  4. +
  5. + Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, and twilighted.net. + (fanficauthors.net withdrawn as they offer native ePub functionality now.) +
  6. +
  7. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
  8. +
  9. + Paste a URL of the first chapter of the fanfic, not the index page, except for fictionalley.org. +
  10. +
  11. + For fictionalley.org, you need to use the URL of the story's chapter list, such as + this. Or the story text URL for fictionalley.org + one-shots, such as this. +
  12. +
  13. + One-shots, fics with a single chapter, are now supported. +
  14. +
  15. + You can download fanfics and store them for 'later' by just downloading them and visiting recent + downloads section. +
  16. +
  17. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
  18. +
  19. + If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is + too large to save in the database and you need to download it straight away. +
  20. +
  21. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
  22. +
  23. + If you think that something that should work in fact doesn't, drop me a mail + to sigizmund@gmail.com, or, even better, write an email to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
  24. +
+ Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
+
+ Powered by Google App Engine +

+ FanfictionLoader is a web front-end to fanficdownloader
+ Copyright © Roman Kirillov +
+ +
+ + +
+ +
+ + + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..bbed2dff --- /dev/null +++ b/index.yaml @@ -0,0 +1,22 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
" + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "", "
" ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + + + + + {{yourfile}} + + +
+
+ Hi, {{ nickname }}! These fanfics you've downloaded previously. +
+
+ +
+ {% for fic in fics %} +

{{ fic.name }} by {{ fic.author }} ({{ fic.format }})
{{ fic.url }}

+ {% endfor %} +
+ + + + + + + + + + diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc new file mode 100644 index 00000000..f01003d4 Binary files /dev/null and b/simplejson/__init__.pyc differ diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc new file mode 100644 index 00000000..2ae9b359 Binary files /dev/null and b/simplejson/decoder.pyc differ diff --git a/simplejson/encoder.py b/simplejson/encoder.py new file mode 100644 index 00000000..cf582903 --- /dev/null +++ b/simplejson/encoder.py @@ -0,0 +1,440 @@ +"""Implementation of JSONEncoder +""" +import re + +try: + from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii +except ImportError: + c_encode_basestring_ascii = None +try: + from simplejson._speedups import make_encoder as c_make_encoder +except ImportError: + c_make_encoder = None + +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +# Assume this produces an infinity on all machines (probably not guaranteed) +INFINITY = float('1e66666') +FLOAT_REPR = repr + +def encode_basestring(s): + """Return a JSON representation of a Python string + + """ + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + + +def py_encode_basestring_ascii(s): + """Return an ASCII-only JSON representation of a Python string + + """ + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + #return '\\u{0:04x}'.format(n) + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/encoder.pyc b/simplejson/encoder.pyc new file mode 100644 index 00000000..e59d372a Binary files /dev/null and b/simplejson/encoder.pyc differ diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc new file mode 100644 index 00000000..30d94445 Binary files /dev/null and b/simplejson/scanner.pyc differ diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 00000000..f16ebf7c Binary files /dev/null and b/static/ajax-loader.gif differ diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 00000000..ad4ca66a Binary files /dev/null and b/static/favicon.ico differ diff --git a/utils/remover.py b/utils/remover.py new file mode 100644 index 00000000..327db984 --- /dev/null +++ b/utils/remover.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +remover.py + +Created by Roman on 2010-06-20. +Copyright (c) 2010 __MyCompanyName__. All rights reserved. +""" + +import datetime +import logging + +from google.appengine.ext.webapp import util +from google.appengine.ext import webapp +from google.appengine.api import users + +from ffstorage import * + +class Remover(webapp.RequestHandler): + def get(self): + logging.debug("Starting r3m0v3r") + user = users.get_current_user() + logging.debug("Working as user %s" % user) + theDate = datetime.date.today() - datetime.timedelta(days=2) + logging.debug("Will delete stuff older than %s" % theDate) + + fics = DownloadedFanfic.all() + fics.order("date") + + results = fics.fetch(50) + + + logging.debug([x.name for x in results]) + + num = 0 + for d in results: +# d.blob = None +# d.cleared = True + d.delete() + num = num + 1 + logging.info('Deleted instances: %d' % num) + self.response.out.write('Deleted instances: %d' % num) + + +def main(): + application = webapp.WSGIApplication([('/r3m0v3r', Remover)], + debug=False) + util.run_wsgi_app(application) + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.DEBUG) + main() \ No newline at end of file