diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a064c00 --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +# +# Project: speedtest-cli +# Created by George "walkero" Sokianos +# 2022-07-31 +# + +release: clean + mkdir -p release/speedtest-cli + cp -r release_files/* release/speedtest-cli/ + cp speedtest.py release/speedtest-cli/speedtest-cli + protect release/speedtest-cli/speedtest-cli srwed + cp -r simplejson release/speedtest-cli/ + rm -rf release/speedtest-cli/simplejson/tests + cp README-OS4.md release/speedtest-cli/ + cp README.rst release/speedtest-cli/ + cp LICENSE release/speedtest-cli/ + cp release_files/Icons/cha05e90/speedtest-cli.info release/speedtest-cli.info + lha -aeqr3 a speedtest-cli.lha release/ + rm -rf release + +clean: + rm -f simplejson/#?.pyc + diff --git a/README-OS4.md b/README-OS4.md new file mode 100644 index 0000000..ea9742f --- /dev/null +++ b/README-OS4.md @@ -0,0 +1,24 @@ +speedtest-cli +--------------------------------------------------------------------------- + +This is a simple terminal script that uses speedtest.net API. This is +based on Python, so it should work on any operating system supporting +Python v2.4 and above. This is created by Matt Martz and can be found +at his GitHub repository at https://github.com/sivel/speedtest-cli + +I adapted it to work with AmigaOS 4. To use it just double click its +icon and a new shell window will open. Automatically a close server +will be chosen and it will measure the download and upload speed of +your system. + +More parameters are available for the script and you can find them +in the included document README.rst. + +The icons are designed by Frank Ruthe and can be found at +http://os4depot.net/?function=showfile&file=graphics/icon/cha05e90_icons_2012.lha + +Changelog +-------------------------- +v2.1.3r1 - 2022-07-31 +* First release + diff --git a/release_files/LICENSE.info b/release_files/LICENSE.info new file mode 100644 index 0000000..c5307b6 Binary files /dev/null and b/release_files/LICENSE.info differ diff --git a/release_files/README-OS4.md.info b/release_files/README-OS4.md.info new file mode 100644 index 0000000..a512187 Binary files /dev/null and b/release_files/README-OS4.md.info differ diff --git a/release_files/README.rst.info b/release_files/README.rst.info new file mode 100644 index 0000000..b902dbe Binary files /dev/null and b/release_files/README.rst.info differ diff --git a/release_files/icons/cha05e90.info b/release_files/icons/cha05e90.info new file mode 100644 index 0000000..2cd8b6f Binary files /dev/null and b/release_files/icons/cha05e90.info differ diff --git a/release_files/icons/cha05e90/speedtest-cli.info b/release_files/icons/cha05e90/speedtest-cli.info new file mode 100644 index 0000000..44f0d25 Binary files /dev/null and b/release_files/icons/cha05e90/speedtest-cli.info differ diff --git a/release_files/icons/cha05e90/speedtest.py.info b/release_files/icons/cha05e90/speedtest.py.info new file mode 100644 index 0000000..c57aeeb Binary files /dev/null and b/release_files/icons/cha05e90/speedtest.py.info differ diff --git a/release_files/speedtest-cli.info b/release_files/speedtest-cli.info new file mode 100644 index 0000000..b678a97 Binary files /dev/null and b/release_files/speedtest-cli.info differ diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 0000000..0556a7a --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,577 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility back to Python 2.5 and (currently) has significant performance +advantages, even without using the optional C extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print(json.dumps("\"foo\bar")) + "\"foo\bar" + >>> print(json.dumps(u'\u1234')) + "\u1234" + >>> print(json.dumps('\\')) + "\\" + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) + {"a": 0, "b": 0, "c": 0} + >>> from simplejson.compat import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from simplejson.compat import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError('Object of type %s is not JSON serializable' % + ... obj.__class__.__name__) + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 3 (char 2) +""" +from __future__ import absolute_import +__version__ = '3.16.0' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', + 'OrderedDict', 'simple_first', 'RawJSON' +] + +__author__ = 'Bob Ippolito ' + +from decimal import Decimal + +from .errors import JSONDecodeError +from .raw_json import RawJSON +from .decoder import JSONDecoder +from .encoder import JSONEncoder, JSONEncoderForHTML +def _import_OrderedDict(): + import collections + try: + return collections.OrderedDict + except AttributeError: + from . import ordered_dict + return ordered_dict.OrderedDict +OrderedDict = _import_OrderedDict() + +def _import_c_make_encoder(): + try: + from ._speedups import make_encoder + return make_encoder + except ImportError: + return None + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + use_decimal=True, + namedtuple_as_object=True, + tuple_as_array=True, + iterable_as_array=False, + bigint_as_string=False, + item_sort_key=None, + for_json=False, + ignore_nan=False, + int_as_string_bitcount=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If *skipkeys* is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If *ensure_ascii* is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If *check_circular* is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If *allow_nan* is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the original JSON specification, instead of using + the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + *ignore_nan* for ECMA-262 compliant behavior. + + If *indent* is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. + + If specified, *separators* should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. + + *encoding* is the character encoding for str instances, default is UTF-8. + + *default(obj)* is a function that should return a serializable version + of obj or raise ``TypeError``. The default simply raises ``TypeError``. + + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. Note that this is still a + lossy operation that will not round-trip correctly and should be used + sparingly. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precedence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead + of subclassing whenever possible. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not iterable_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + iterable_as_array=iterable_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, + **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. + + If specified, ``separators`` should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + + If *bigint_as_string* is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precendence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing + whenever possible. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not iterable_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + iterable_as_array=iterable_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, + **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + use_decimal=use_decimal, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and object_pairs_hook is None + and not use_decimal and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + if use_decimal: + if parse_float is not None: + raise TypeError("use_decimal=True implies parse_float=Decimal") + kw['parse_float'] = Decimal + return cls(encoding=encoding, **kw).decode(s) + + +def _toggle_speedups(enabled): + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan + c_make_encoder = _import_c_make_encoder() + if enabled: + dec.scanstring = dec.c_scanstring or dec.py_scanstring + enc.c_make_encoder = c_make_encoder + enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or + enc.py_encode_basestring_ascii) + scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner + else: + dec.scanstring = dec.py_scanstring + enc.c_make_encoder = None + enc.encode_basestring_ascii = enc.py_encode_basestring_ascii + scan.make_scanner = scan.py_make_scanner + dec.make_scanner = scan.make_scanner + global _default_decoder + _default_decoder = JSONDecoder( + encoding=None, + object_hook=None, + object_pairs_hook=None, + ) + global _default_encoder + _default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + ) + +def simple_first(kv): + """Helper function to pass to item_sort_key to sort simple + elements to the top, then container elements. + """ + return (isinstance(kv[1], (list, dict, tuple)), kv[0]) diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 0000000..e710128 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,3384 @@ +/* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */ +#include "Python.h" +#include "structmember.h" + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_FromSsize_t PyLong_FromSsize_t +#define PyInt_AsSsize_t PyLong_AsSsize_t +#define PyInt_Check(obj) 0 +#define PyInt_CheckExact(obj) 0 +#define JSON_UNICHR Py_UCS4 +#define JSON_InternFromString PyUnicode_InternFromString +#define PyString_GET_SIZE PyUnicode_GET_LENGTH +#define PY2_UNUSED +#define PY3_UNUSED UNUSED +#else /* PY_MAJOR_VERSION >= 3 */ +#define PY2_UNUSED UNUSED +#define PY3_UNUSED +#define PyBytes_Check PyString_Check +#define PyUnicode_READY(obj) 0 +#define PyUnicode_KIND(obj) (sizeof(Py_UNICODE)) +#define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj))) +#define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)]) +#define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE +#define JSON_UNICHR Py_UNICODE +#define JSON_InternFromString PyString_InternFromString +#endif /* PY_MAJOR_VERSION < 3 */ + +#if PY_VERSION_HEX < 0x02070000 +#if !defined(PyOS_string_to_double) +#define PyOS_string_to_double json_PyOS_string_to_double +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) +{ + double x; + assert(endptr == NULL); + assert(overflow_exception == NULL); + PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;) + x = PyOS_ascii_atof(s); + PyFPE_END_PROTECT(x) + return x; +} +#endif +#endif /* PY_VERSION_HEX < 0x02070000 */ + +#if PY_VERSION_HEX < 0x02060000 +#if !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if !defined(Py_SIZE) +#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) +#endif +#if !defined(PyVarObject_HEAD_INIT) +#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, +#endif +#endif /* PY_VERSION_HEX < 0x02060000 */ + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +#define JSON_ALLOW_NAN 1 +#define JSON_IGNORE_NAN 2 + +static PyObject *JSON_Infinity = NULL; +static PyObject *JSON_NegInfinity = NULL; +static PyObject *JSON_NaN = NULL; +static PyObject *JSON_EmptyUnicode = NULL; +#if PY_MAJOR_VERSION < 3 +static PyObject *JSON_EmptyStr = NULL; +#endif + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct { + PyObject *large_strings; /* A list of previously accumulated large strings */ + PyObject *small_strings; /* Pending small strings */ +} JSON_Accu; + +static int +JSON_Accu_Init(JSON_Accu *acc); +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode); +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc); +static void +JSON_Accu_Destroy(JSON_Accu *acc); + +#define ERR_EXPECTING_VALUE "Expecting value" +#define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'" +#define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'" +#define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'" +#define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes" +#define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'" +#define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter" +#define ERR_STRING_UNTERMINATED "Unterminated string starting at" +#define ERR_STRING_CONTROL "Invalid control character %r at" +#define ERR_STRING_ESC1 "Invalid \\X escape sequence %r" +#define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence" + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict_bool; + int strict; + PyObject *object_hook; + PyObject *pairs_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; + PyObject *memo; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict_bool), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *key_memo; + PyObject *encoding; + PyObject *Decimal; + PyObject *skipkeys_bool; + int skipkeys; + int fast_encode; + /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */ + int allow_or_ignore_nan; + int use_decimal; + int namedtuple_as_object; + int tuple_as_array; + int iterable_as_array; + PyObject *max_long_size; + PyObject *min_long_size; + PyObject *item_sort_key; + PyObject *item_sort_kw; + int for_json; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + /* Python 2.5 does not support T_BOOl */ + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"}, + {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, + {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"}, + {"max_long_size", T_OBJECT, offsetof(PyEncoderObject, max_long_size), READONLY, "max_long_size"}, + {"min_long_size", T_OBJECT, offsetof(PyEncoderObject, min_long_size), READONLY, "min_long_size"}, + {NULL} +}; + +static PyObject * +join_list_unicode(PyObject *lst); +static PyObject * +JSON_ParseEncoding(PyObject *encoding); +static PyObject * +maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj); +static Py_ssize_t +ascii_char_size(JSON_UNICHR c); +static Py_ssize_t +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +#if PY_MAJOR_VERSION < 3 +static PyObject * +join_list_string(PyObject *lst); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +#endif +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +is_raw_json(PyObject *obj); +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key); +static int +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *obj); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); +static int +_is_namedtuple(PyObject *obj); +static int +_has_for_json_hook(PyObject *obj); +static PyObject * +moduleinit(void); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 + +static PyObject* RawJSONType = NULL; +static int +is_raw_json(PyObject *obj) +{ + return PyObject_IsInstance(obj, RawJSONType) ? 1 : 0; +} + +static int +JSON_Accu_Init(JSON_Accu *acc) +{ + /* Lazily allocated */ + acc->large_strings = NULL; + acc->small_strings = PyList_New(0); + if (acc->small_strings == NULL) + return -1; + return 0; +} + +static int +flush_accumulator(JSON_Accu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large_strings == NULL) { + acc->large_strings = PyList_New(0); + if (acc->large_strings == NULL) + return -1; + } +#if PY_MAJOR_VERSION >= 3 + joined = join_list_unicode(acc->small_strings); +#else /* PY_MAJOR_VERSION >= 3 */ + joined = join_list_string(acc->small_strings); +#endif /* PY_MAJOR_VERSION < 3 */ + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large_strings, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; +#if PY_MAJOR_VERSION >= 3 + assert(PyUnicode_Check(unicode)); +#else /* PY_MAJOR_VERSION >= 3 */ + assert(PyString_Check(unicode) || PyUnicode_Check(unicode)); +#endif /* PY_MAJOR_VERSION < 3 */ + + if (PyList_Append(acc->small_strings, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small_strings); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small_strings); + if (ret) { + Py_CLEAR(acc->large_strings); + return NULL; + } + res = acc->large_strings; + acc->large_strings = NULL; + if (res == NULL) + return PyList_New(0); + return res; +} + +static void +JSON_Accu_Destroy(JSON_Accu *acc) +{ + Py_CLEAR(acc->small_strings); + Py_CLEAR(acc->large_strings); +} + +static int +IS_DIGIT(JSON_UNICHR c) +{ + return c >= '0' && c <= '9'; +} + +static PyObject * +maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj) +{ + if (s->max_long_size != Py_None && s->min_long_size != Py_None) { + if (PyObject_RichCompareBool(obj, s->max_long_size, Py_GE) || + PyObject_RichCompareBool(obj, s->min_long_size, Py_LE)) { +#if PY_MAJOR_VERSION >= 3 + PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded); +#else + PyObject* quoted = PyString_FromFormat("\"%s\"", + PyString_AsString(encoded)); +#endif + Py_DECREF(encoded); + encoded = quoted; + } + } + + return encoded; +} + +static int +_is_namedtuple(PyObject *obj) +{ + int rval = 0; + PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict"); + if (_asdict == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(_asdict); + Py_DECREF(_asdict); + return rval; +} + +static int +_has_for_json_hook(PyObject *obj) +{ + int rval = 0; + PyObject *for_json = PyObject_GetAttrString(obj, "for_json"); + if (for_json == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(for_json); + Py_DECREF(for_json); + return rval; +} + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()) + return 0; + return 1; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + JSON_UNICHR v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + } + return chars; +} + +static Py_ssize_t +ascii_char_size(JSON_UNICHR c) +{ + if (S_CHAR(c)) { + return 1; + } + else if (c == '\\' || + c == '"' || + c == '\b' || + c == '\f' || + c == '\n' || + c == '\r' || + c == '\t') { + return 2; + } +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) + else if (c >= 0x10000U) { + return 2 * MIN_EXPANSION; + } +#endif + else { + return MIN_EXPANSION; + } +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars = PyUnicode_GET_LENGTH(pystr); + Py_ssize_t output_size = 2; + Py_ssize_t chars; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *data = PyUnicode_DATA(pystr); + PyObject *rval; + char *output; + + output_size = 2; + for (i = 0; i < input_chars; i++) { + output_size += ascii_char_size(PyUnicode_READ(kind, data, i)); + } +#if PY_MAJOR_VERSION >= 3 + rval = PyUnicode_New(output_size, 127); + if (rval == NULL) { + return NULL; + } + assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND); + output = (char *)PyUnicode_DATA(rval); +#else + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); +#endif + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} + +#if PY_MAJOR_VERSION >= 3 + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + PyObject *rval; + PyObject *input = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(pystr), PyBytes_GET_SIZE(pystr), NULL); + if (input == NULL) + return NULL; + rval = ascii_escape_unicode(input); + Py_DECREF(input); + return rval; +} + +#else /* PY_MAJOR_VERSION >= 3 */ + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + output_size = 2; + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + JSON_UNICHR c = (JSON_UNICHR)input_str[i]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + output_size += ascii_char_size(c); + } + + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + chars = 0; + output = PyString_AS_STRING(rval); + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key) +{ + if (PyUnicode_Check(key)) { + Py_INCREF(key); + return key; + } +#if PY_MAJOR_VERSION >= 3 + else if (PyBytes_Check(key) && s->encoding != NULL) { + const char *encoding = PyUnicode_AsUTF8(s->encoding); + if (encoding == NULL) + return NULL; + return PyUnicode_Decode( + PyBytes_AS_STRING(key), + PyBytes_GET_SIZE(key), + encoding, + NULL); + } +#else /* PY_MAJOR_VERSION >= 3 */ + else if (PyString_Check(key)) { + Py_INCREF(key); + return key; + } +#endif /* PY_MAJOR_VERSION < 3 */ + else if (PyFloat_Check(key)) { + return encoder_encode_float(s, key); + } + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyInt_Check because + True and False are also 1 and 0.*/ + return _encoded_const(key); + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + if (!(PyInt_CheckExact(key) || PyLong_CheckExact(key))) { + /* See #118, do not trust custom str/repr */ + PyObject *res; + PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, key, NULL); + if (tmp == NULL) { + return NULL; + } + res = PyObject_Str(tmp); + Py_DECREF(tmp); + return res; + } + else { + return PyObject_Str(key); + } + } + else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) { + return PyObject_Str(key); + } + if (s->skipkeys) { + Py_INCREF(Py_None); + return Py_None; + } + PyErr_Format(PyExc_TypeError, + "keys must be str, int, float, bool or None, " + "not %.100s", key->ob_type->tp_name); + return NULL; +} + +static PyObject * +encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct) +{ + PyObject *items; + PyObject *iter = NULL; + PyObject *lst = NULL; + PyObject *item = NULL; + PyObject *kstr = NULL; + PyObject *sortfun = NULL; + PyObject *sortres; + static PyObject *sortargs = NULL; + + if (sortargs == NULL) { + sortargs = PyTuple_New(0); + if (sortargs == NULL) + return NULL; + } + + if (PyDict_CheckExact(dct)) + items = PyDict_Items(dct); + else + items = PyMapping_Items(dct); + if (items == NULL) + return NULL; + iter = PyObject_GetIter(items); + Py_DECREF(items); + if (iter == NULL) + return NULL; + if (s->item_sort_kw == Py_None) + return iter; + lst = PyList_New(0); + if (lst == NULL) + goto bail; + while ((item = PyIter_Next(iter))) { + PyObject *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(key)) { + /* item can be added as-is */ + } +#endif /* PY_MAJOR_VERSION < 3 */ + else if (PyUnicode_Check(key)) { + /* item can be added as-is */ + } + else { + PyObject *tpl; + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(kstr); + continue; + } + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + tpl = PyTuple_Pack(2, kstr, value); + if (tpl == NULL) + goto bail; + Py_CLEAR(kstr); + Py_DECREF(item); + item = tpl; + } + if (PyList_Append(lst, item)) + goto bail; + Py_DECREF(item); + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + sortfun = PyObject_GetAttrString(lst, "sort"); + if (sortfun == NULL) + goto bail; + sortres = PyObject_Call(sortfun, sortargs, s->item_sort_kw); + if (!sortres) + goto bail; + Py_DECREF(sortres); + Py_CLEAR(sortfun); + iter = PyObject_GetIter(lst); + Py_CLEAR(lst); + return iter; +bail: + Py_XDECREF(sortfun); + Py_XDECREF(kstr); + Py_XDECREF(item); + Py_XDECREF(lst); + Py_XDECREF(iter); + return NULL; +} + +/* Use JSONDecodeError exception to raise a nice looking ValueError subclass */ +static PyObject *JSONDecodeError = NULL; +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + PyObject *exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (exc) { + PyErr_SetObject(JSONDecodeError, exc); + Py_DECREF(exc); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + return PyUnicode_Join(JSON_EmptyUnicode, lst); +} + +#if PY_MAJOR_VERSION >= 3 +#define join_list_string join_list_unicode +#else /* PY_MAJOR_VERSION >= 3 */ +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + joinfn = PyObject_GetAttrString(JSON_EmptyStr, "join"); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) +{ + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + assert(PyErr_Occurred()); + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +#define APPEND_OLD_CHUNK \ + if (chunk != NULL) { \ + if (chunks == NULL) { \ + chunks = PyList_New(0); \ + if (chunks == NULL) { \ + goto bail; \ + } \ + } \ + if (PyList_Append(chunks, chunk)) { \ + goto bail; \ + } \ + Py_CLEAR(chunk); \ + } + +#if PY_MAJOR_VERSION < 3 +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = NULL; + PyObject *chunk = NULL; + PyObject *strchunk = NULL; + + if (len == end) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + else if (end < 0 || len < end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg(ERR_STRING_CONTROL, pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + APPEND_OLD_CHUNK + strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + JSON_UNICHR digit = (JSON_UNICHR)buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } +#if defined(Py_UNICODE_WIDE) + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') { + JSON_UNICHR c2 = 0; + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + c2 <<= 4; + JSON_UNICHR digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + } + } +#endif /* Py_UNICODE_WIDE */ + } + if (c > 0x7f) { + has_unicode = 1; + } + APPEND_OLD_CHUNK + if (has_unicode) { + chunk = PyUnicode_FromOrdinal(c); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + } + + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else { + rval = JSON_EmptyStr; + Py_INCREF(rval); + } + } + else { + APPEND_OLD_CHUNK + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + } + + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunk); + Py_XDECREF(chunks); + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + Py_ssize_t len = PyUnicode_GET_LENGTH(pystr); + void *buf = PyUnicode_DATA(pystr); + PyObject *chunks = NULL; + PyObject *chunk = NULL; + + if (len == end) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + else if (end < 0 || len < end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + JSON_UNICHR c = 0; + for (next = end; next < len; next++) { + c = PyUnicode_READ(kind, buf, next); + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg(ERR_STRING_CONTROL, pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION < 3 + chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end); +#else + chunk = PyUnicode_Substring(pystr, end, next); +#endif + if (chunk == NULL) { + goto bail; + } + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + c = PyUnicode_READ(kind, buf, next); + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + JSON_UNICHR c2 = 0; + if (end + 6 < len && + PyUnicode_READ(kind, buf, next) == '\\' && + PyUnicode_READ(kind, buf, next + 1) == 'u') { + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); + c2 <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + } + } +#endif + } + APPEND_OLD_CHUNK + chunk = PyUnicode_FromOrdinal(c); + if (chunk == NULL) { + goto bail; + } + } + + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else { + rval = JSON_EmptyUnicode; + Py_INCREF(rval); + } + } + else { + APPEND_OLD_CHUNK + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + } + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunk); + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyUnicode_Check(pystr)) { + if (PyUnicode_READY(pystr)) + return NULL; + rval = scanstring_unicode(pystr, end, strict, &next_end); + } +#if PY_MAJOR_VERSION < 3 + /* Using a bytes input is unsupported for scanning in Python 3. + It is coerced to str in the decoder before it gets here. */ + else if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } +#endif + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyBytes_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + if (PyUnicode_READY(pystr)) + return NULL; + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* bpo-31095: UnTrack is needed before calling any callbacks */ + PyObject_GC_UnTrack(self); + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict_bool); + Py_VISIT(s->object_hook); + Py_VISIT(s->pairs_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + Py_VISIT(s->memo); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict_bool); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + Py_CLEAR(s->memo); + return 0; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook or + object_pairs_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; + Py_ssize_t next_idx; + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + PyObject *memokey; + trailing_delimiter = 0; + + /* read key */ + if (str[idx] != '"') { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, s->strict, &next_idx); + if (key == NULL) + goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + did_parse = 1; + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } + goto bail; + } + + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(rval); + Py_XDECREF(key); + Py_XDECREF(val); + Py_XDECREF(pairs); + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; + PyObject *key = NULL; + PyObject *val = NULL; + int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; + Py_ssize_t next_idx; + + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + PyObject *memokey; + trailing_delimiter = 0; + + /* read key */ + if (PyUnicode_READ(kind, str, idx) != '"') { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx); + if (key == NULL) + goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip + whitespace */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') { + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* bail if the object is closed or we didn't get the , + delimiter */ + did_parse = 1; + if (idx > end_idx) break; + if (PyUnicode_READ(kind, str, idx) == '}') { + break; + } + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } + goto bail; + } + + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(rval); + Py_XDECREF(key); + Py_XDECREF(val); + Py_XDECREF(pairs); + return NULL; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + trailing_delimiter = 0; + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) { + goto bail; + } + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + trailing_delimiter = 0; + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) { + goto bail; + } + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (PyUnicode_READ(kind, str, idx) == ']') { + break; + } + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON constant from PyString pystr. + constant is the Python string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *rval; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, constant, NULL); + idx += PyString_GET_SIZE(constant); + *next_idx_ptr = idx; + return rval; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + idx++; + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */ + double d = PyOS_string_to_double(PyString_AS_STRING(numstr), + NULL, NULL); + if (d == -1.0 && PyErr_Occurred()) + return NULL; + rval = PyFloat_FromDouble(d); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + JSON_UNICHR c; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (PyUnicode_READ(kind, str, idx) == '-') { + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + idx++; + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + c = PyUnicode_READ(kind, str, idx); + if (c == '0') { + /* if it starts with 0 we only expect one integer digit */ + idx++; + } + else if (IS_DIGIT(c)) { + idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) { + idx++; + } + } + else { + /* no integer digits, error */ + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && + PyUnicode_READ(kind, str, idx) == '.' && + IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) { + is_float = 1; + idx += 2; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == 'e' || + PyUnicode_READ(kind, str, idx) == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == '-' || + PyUnicode_READ(kind, str, idx) == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ +#if PY_MAJOR_VERSION >= 3 + numstr = PyUnicode_Substring(pystr, start, idx); +#else + numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start); +#endif + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { +#if PY_MAJOR_VERSION >= 3 + rval = PyFloat_FromString(numstr); +#else + rval = PyFloat_FromString(numstr, NULL); +#endif + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + PyObject *rval = NULL; + int fallthrough = 0; + if (idx < 0 || idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + rval = scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + s->strict, + next_idx_ptr); + break; + case '{': + /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a string")) + return NULL; + rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case '[': + /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a string")) + return NULL; + rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + rval = Py_None; + } + else + fallthrough = 1; + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + rval = Py_True; + } + else + fallthrough = 1; + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + rval = Py_False; + } + else + fallthrough = 1; + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + default: + fallthrough = 1; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + if (fallthrough) + rval = _match_number_str(s, pystr, idx, next_idx_ptr); + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t length = PyUnicode_GET_LENGTH(pystr); + PyObject *rval = NULL; + int fallthrough = 0; + if (idx < 0 || idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + switch (PyUnicode_READ(kind, str, idx)) { + case '"': + /* string */ + rval = scanstring_unicode(pystr, idx + 1, + s->strict, + next_idx_ptr); + break; + case '{': + /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a unicode string")) + return NULL; + rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case '[': + /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a unicode string")) + return NULL; + rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case 'n': + /* null */ + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'u' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + rval = Py_None; + } + else + fallthrough = 1; + break; + case 't': + /* true */ + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'r' && + PyUnicode_READ(kind, str, idx + 2) == 'u' && + PyUnicode_READ(kind, str, idx + 3) == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + rval = Py_True; + } + else + fallthrough = 1; + break; + case 'f': + /* false */ + if ((idx + 4 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 's' && + PyUnicode_READ(kind, str, idx + 4) == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + rval = Py_False; + } + else + fallthrough = 1; + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'N') { + rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'n' && + PyUnicode_READ(kind, str, idx + 2) == 'f' && + PyUnicode_READ(kind, str, idx + 3) == 'i' && + PyUnicode_READ(kind, str, idx + 4) == 'n' && + PyUnicode_READ(kind, str, idx + 5) == 'i' && + PyUnicode_READ(kind, str, idx + 6) == 't' && + PyUnicode_READ(kind, str, idx + 7) == 'y') { + rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'I' && + PyUnicode_READ(kind, str, idx + 2) == 'n' && + PyUnicode_READ(kind, str, idx + 3) == 'f' && + PyUnicode_READ(kind, str, idx + 4) == 'i' && + PyUnicode_READ(kind, str, idx + 5) == 'n' && + PyUnicode_READ(kind, str, idx + 6) == 'i' && + PyUnicode_READ(kind, str, idx + 7) == 't' && + PyUnicode_READ(kind, str, idx + 8) == 'y') { + rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + default: + fallthrough = 1; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + if (fallthrough) + rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); + return rval; +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyUnicode_Check(pystr)) { + if (PyUnicode_READY(pystr)) + return NULL; + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } +#endif /* PY_MAJOR_VERSION < 3 */ + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + PyDict_Clear(s->memo); + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +JSON_ParseEncoding(PyObject *encoding) +{ + if (encoding == Py_None) + return JSON_InternFromString(DEFAULT_ENCODING); +#if PY_MAJOR_VERSION >= 3 + if (PyUnicode_Check(encoding)) { + if (PyUnicode_AsUTF8(encoding) == NULL) { + return NULL; + } + Py_INCREF(encoding); + return encoding; + } +#else /* PY_MAJOR_VERSION >= 3 */ + if (PyString_Check(encoding)) { + Py_INCREF(encoding); + return encoding; + } + if (PyUnicode_Check(encoding)) + return PyUnicode_AsEncodedString(encoding, NULL, NULL); +#endif /* PY_MAJOR_VERSION >= 3 */ + PyErr_SetString(PyExc_TypeError, "encoding must be a string"); + return NULL; +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + PyObject *encoding; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return NULL; + + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s == NULL) + return NULL; + + if (s->memo == NULL) { + s->memo = PyDict_New(); + if (s->memo == NULL) + goto bail; + } + + encoding = PyObject_GetAttrString(ctx, "encoding"); + if (encoding == NULL) + goto bail; + s->encoding = JSON_ParseEncoding(encoding); + Py_XDECREF(encoding); + if (s->encoding == NULL) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict_bool = PyObject_GetAttrString(ctx, "strict"); + if (s->strict_bool == NULL) + goto bail; + s->strict = PyObject_IsTrue(s->strict_bool); + if (s->strict < 0) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); + if (s->pairs_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return (PyObject *)s; + +bail: + Py_DECREF(s); + return NULL; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyVarObject_HEAD_INIT(NULL, 0) + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = { + "markers", + "default", + "encoder", + "indent", + "key_separator", + "item_separator", + "sort_keys", + "skipkeys", + "allow_nan", + "key_memo", + "use_decimal", + "namedtuple_as_object", + "tuple_as_array", + "int_as_string_bitcount", + "item_sort_key", + "encoding", + "for_json", + "ignore_nan", + "Decimal", + "iterable_as_array", + NULL}; + + PyEncoderObject *s; + PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; + PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; + PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array, *iterable_as_array; + PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json; + PyObject *ignore_nan, *Decimal; + int is_true; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOOO:make_encoder", kwlist, + &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, + &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, + &namedtuple_as_object, &tuple_as_array, + &int_as_string_bitcount, &item_sort_key, &encoding, &for_json, + &ignore_nan, &Decimal, &iterable_as_array)) + return NULL; + + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s == NULL) + return NULL; + + Py_INCREF(markers); + s->markers = markers; + Py_INCREF(defaultfn); + s->defaultfn = defaultfn; + Py_INCREF(encoder); + s->encoder = encoder; +#if PY_MAJOR_VERSION >= 3 + if (encoding == Py_None) { + s->encoding = NULL; + } + else +#endif /* PY_MAJOR_VERSION >= 3 */ + { + s->encoding = JSON_ParseEncoding(encoding); + if (s->encoding == NULL) + goto bail; + } + Py_INCREF(indent); + s->indent = indent; + Py_INCREF(key_separator); + s->key_separator = key_separator; + Py_INCREF(item_separator); + s->item_separator = item_separator; + Py_INCREF(skipkeys); + s->skipkeys_bool = skipkeys; + s->skipkeys = PyObject_IsTrue(skipkeys); + if (s->skipkeys < 0) + goto bail; + Py_INCREF(key_memo); + s->key_memo = key_memo; + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + is_true = PyObject_IsTrue(ignore_nan); + if (is_true < 0) + goto bail; + s->allow_or_ignore_nan = is_true ? JSON_IGNORE_NAN : 0; + is_true = PyObject_IsTrue(allow_nan); + if (is_true < 0) + goto bail; + s->allow_or_ignore_nan |= is_true ? JSON_ALLOW_NAN : 0; + s->use_decimal = PyObject_IsTrue(use_decimal); + if (s->use_decimal < 0) + goto bail; + s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); + if (s->namedtuple_as_object < 0) + goto bail; + s->tuple_as_array = PyObject_IsTrue(tuple_as_array); + if (s->tuple_as_array < 0) + goto bail; + s->iterable_as_array = PyObject_IsTrue(iterable_as_array); + if (s->iterable_as_array < 0) + goto bail; + if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) { + static const unsigned long long_long_bitsize = SIZEOF_LONG_LONG * 8; + long int_as_string_bitcount_val = PyLong_AsLong(int_as_string_bitcount); + if (int_as_string_bitcount_val > 0 && int_as_string_bitcount_val < (long)long_long_bitsize) { + s->max_long_size = PyLong_FromUnsignedLongLong(1ULL << (int)int_as_string_bitcount_val); + s->min_long_size = PyLong_FromLongLong(-1LL << (int)int_as_string_bitcount_val); + if (s->min_long_size == NULL || s->max_long_size == NULL) { + goto bail; + } + } + else { + PyErr_Format(PyExc_TypeError, + "int_as_string_bitcount (%ld) must be greater than 0 and less than the number of bits of a `long long` type (%lu bits)", + int_as_string_bitcount_val, long_long_bitsize); + goto bail; + } + } + else if (int_as_string_bitcount == Py_None) { + Py_INCREF(Py_None); + s->max_long_size = Py_None; + Py_INCREF(Py_None); + s->min_long_size = Py_None; + } + else { + PyErr_SetString(PyExc_TypeError, "int_as_string_bitcount must be None or an integer"); + goto bail; + } + if (item_sort_key != Py_None) { + if (!PyCallable_Check(item_sort_key)) { + PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); + goto bail; + } + } + else { + is_true = PyObject_IsTrue(sort_keys); + if (is_true < 0) + goto bail; + if (is_true) { + static PyObject *itemgetter0 = NULL; + if (!itemgetter0) { + PyObject *operator = PyImport_ImportModule("operator"); + if (!operator) + goto bail; + itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0); + Py_DECREF(operator); + } + item_sort_key = itemgetter0; + if (!item_sort_key) + goto bail; + } + } + if (item_sort_key == Py_None) { + Py_INCREF(Py_None); + s->item_sort_kw = Py_None; + } + else { + s->item_sort_kw = PyDict_New(); + if (s->item_sort_kw == NULL) + goto bail; + if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key)) + goto bail; + } + Py_INCREF(sort_keys); + s->sort_keys = sort_keys; + Py_INCREF(item_sort_key); + s->item_sort_key = item_sort_key; + Py_INCREF(Decimal); + s->Decimal = Decimal; + s->for_json = PyObject_IsTrue(for_json); + if (s->for_json < 0) + goto bail; + + return (PyObject *)s; + +bail: + Py_DECREF(s); + return NULL; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + Py_ssize_t indent_level; + PyEncoderObject *s; + JSON_Accu rval; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + if (JSON_Accu_Init(&rval)) + return NULL; + if (encoder_listencode_obj(s, &rval, obj, indent_level)) { + JSON_Accu_Destroy(&rval); + return NULL; + } + return JSON_Accu_FinishAsList(&rval); +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = JSON_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = JSON_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = JSON_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_or_ignore_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (s->allow_or_ignore_nan & JSON_IGNORE_NAN) { + return _encoded_const(Py_None); + } + /* JSON_ALLOW_NAN is set */ + else if (i > 0) { + Py_INCREF(JSON_Infinity); + return JSON_Infinity; + } + else if (i < 0) { + Py_INCREF(JSON_NegInfinity); + return JSON_NegInfinity; + } + else { + Py_INCREF(JSON_NaN); + return JSON_NaN; + } + } + /* Use a better float format here? */ + if (PyFloat_CheckExact(obj)) { + return PyObject_Repr(obj); + } + else { + /* See #118, do not trust custom str/repr */ + PyObject *res; + PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyFloat_Type, obj, NULL); + if (tmp == NULL) { + return NULL; + } + res = PyObject_Repr(tmp); + Py_DECREF(tmp); + return res; + } +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + PyObject *encoded; + + if (s->fast_encode) { + return py_encode_basestring_ascii(NULL, obj); + } + encoded = PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); + if (encoded != NULL && +#if PY_MAJOR_VERSION < 3 + !PyString_Check(encoded) && +#endif /* PY_MAJOR_VERSION < 3 */ + !PyUnicode_Check(encoded)) + { + PyErr_Format(PyExc_TypeError, + "encoder() must return a string, not %.80s", + Py_TYPE(encoded)->tp_name); + Py_DECREF(encoded); + return NULL; + } + return encoded; +} + +static int +_steal_accumulate(JSON_Accu *accu, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = JSON_Accu_Accumulate(accu, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + int rv = -1; + do { + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr != NULL) + rv = _steal_accumulate(rval, cstr); + } + else if ((PyBytes_Check(obj) && s->encoding != NULL) || + PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded; + if (PyInt_CheckExact(obj) || PyLong_CheckExact(obj)) { + encoded = PyObject_Str(obj); + } + else { + /* See #118, do not trust custom str/repr */ + PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, obj, NULL); + if (tmp == NULL) { + encoded = NULL; + } + else { + encoded = PyObject_Str(tmp); + Py_DECREF(tmp); + } + } + if (encoded != NULL) { + encoded = maybe_quote_bigint(s, encoded, obj); + if (encoded == NULL) + break; + rv = _steal_accumulate(rval, encoded); + } + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else if (s->for_json && _has_for_json_hook(obj)) { + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "for_json", NULL); + if (newobj != NULL) { + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } + Py_LeaveRecursiveCall(); + } + else if (s->namedtuple_as_object && _is_namedtuple(obj)) { + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "_asdict", NULL); + if (newobj != NULL) { + rv = encoder_listencode_dict(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } + Py_LeaveRecursiveCall(); + } + else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + rv = encoder_listencode_list(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); + } + else if (PyDict_Check(obj)) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + rv = encoder_listencode_dict(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); + } + else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else if (is_raw_json(obj)) + { + PyObject *encoded = PyObject_GetAttrString(obj, "encoded_json"); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else { + PyObject *ident = NULL; + PyObject *newobj; + if (s->iterable_as_array) { + newobj = PyObject_GetIter(obj); + if (newobj == NULL) + PyErr_Clear(); + else { + rv = encoder_listencode_list(s, rval, newobj, indent_level); + Py_DECREF(newobj); + break; + } + } + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + break; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + break; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + break; + } + } + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + Py_LeaveRecursiveCall(); + break; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_LeaveRecursiveCall(); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + rv = -1; + } + else if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + rv = -1; + } + Py_XDECREF(ident); + } + } + } while (0); + return rv; +} + +static int +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *iter = NULL; + PyObject *item = NULL; + PyObject *items = NULL; + PyObject *encoded = NULL; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = JSON_InternFromString("{"); + close_dict = JSON_InternFromString("}"); + empty_dict = JSON_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return JSON_Accu_Accumulate(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (JSON_Accu_Accumulate(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (_indent * _current_indent_level) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + iter = encoder_dict_iteritems(s, dct); + if (iter == NULL) + goto bail; + + idx = 0; + while ((item = PyIter_Next(iter))) { + PyObject *encoded, *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + + encoded = PyDict_GetItem(s->key_memo, key); + if (encoded != NULL) { + Py_INCREF(encoded); + } else { + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(item); + Py_DECREF(kstr); + continue; + } + } + if (idx) { + if (JSON_Accu_Accumulate(rval, s->item_separator)) + goto bail; + } + if (encoded == NULL) { + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyDict_SetItem(s->key_memo, key, encoded)) + goto bail; + } + if (JSON_Accu_Accumulate(rval, encoded)) { + goto bail; + } + Py_CLEAR(encoded); + if (JSON_Accu_Accumulate(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + Py_CLEAR(item); + idx += 1; + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (_indent * _current_indent_level) + */ + } + if (JSON_Accu_Accumulate(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(encoded); + Py_XDECREF(items); + Py_XDECREF(item); + Py_XDECREF(iter); + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *iter = NULL; + PyObject *obj = NULL; + int is_true; + int i = 0; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = JSON_InternFromString("["); + close_array = JSON_InternFromString("]"); + empty_array = JSON_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + is_true = PyObject_IsTrue(seq); + if (is_true == -1) + return -1; + else if (is_true == 0) + return JSON_Accu_Accumulate(rval, empty_array); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + iter = PyObject_GetIter(seq); + if (iter == NULL) + goto bail; + + if (JSON_Accu_Accumulate(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (_indent * _current_indent_level) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + while ((obj = PyIter_Next(iter))) { + if (i) { + if (JSON_Accu_Accumulate(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + i++; + Py_CLEAR(obj); + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (_indent * _current_indent_level) + */ + } + if (JSON_Accu_Accumulate(rval, close_array)) + goto bail; + return 0; + +bail: + Py_XDECREF(obj); + Py_XDECREF(iter); + Py_XDECREF(ident); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* bpo-31095: UnTrack is needed before calling any callbacks */ + PyObject_GC_UnTrack(self); + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->encoding); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->key_memo); + Py_VISIT(s->sort_keys); + Py_VISIT(s->item_sort_kw); + Py_VISIT(s->item_sort_key); + Py_VISIT(s->max_long_size); + Py_VISIT(s->min_long_size); + Py_VISIT(s->Decimal); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->encoding); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->key_memo); + Py_CLEAR(s->skipkeys_bool); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->item_sort_kw); + Py_CLEAR(s->item_sort_key); + Py_CLEAR(s->max_long_size); + Py_CLEAR(s->min_long_size); + Py_CLEAR(s->Decimal); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyVarObject_HEAD_INIT(NULL, 0) + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_speedups", /* m_name */ + module_doc, /* m_doc */ + -1, /* m_size */ + speedups_methods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear*/ + NULL, /* m_free */ +}; +#endif + +PyObject * +import_dependency(char *module_name, char *attr_name) +{ + PyObject *rval; + PyObject *module = PyImport_ImportModule(module_name); + if (module == NULL) + return NULL; + rval = PyObject_GetAttrString(module, attr_name); + Py_DECREF(module); + return rval; +} + +static int +init_constants(void) +{ + JSON_NaN = JSON_InternFromString("NaN"); + if (JSON_NaN == NULL) + return 0; + JSON_Infinity = JSON_InternFromString("Infinity"); + if (JSON_Infinity == NULL) + return 0; + JSON_NegInfinity = JSON_InternFromString("-Infinity"); + if (JSON_NegInfinity == NULL) + return 0; +#if PY_MAJOR_VERSION >= 3 + JSON_EmptyUnicode = PyUnicode_New(0, 127); +#else /* PY_MAJOR_VERSION >= 3 */ + JSON_EmptyStr = PyString_FromString(""); + if (JSON_EmptyStr == NULL) + return 0; + JSON_EmptyUnicode = PyUnicode_FromUnicode(NULL, 0); +#endif /* PY_MAJOR_VERSION >= 3 */ + if (JSON_EmptyUnicode == NULL) + return 0; + + return 1; +} + +static PyObject * +moduleinit(void) +{ + PyObject *m; + if (PyType_Ready(&PyScannerType) < 0) + return NULL; + if (PyType_Ready(&PyEncoderType) < 0) + return NULL; + if (!init_constants()) + return NULL; + +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule3("_speedups", speedups_methods, module_doc); +#endif + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); + RawJSONType = import_dependency("simplejson.raw_json", "RawJSON"); + if (RawJSONType == NULL) + return NULL; + JSONDecodeError = import_dependency("simplejson.errors", "JSONDecodeError"); + if (JSONDecodeError == NULL) + return NULL; + return m; +} + +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC +PyInit__speedups(void) +{ + return moduleinit(); +} +#else +void +init_speedups(void) +{ + moduleinit(); +} +#endif diff --git a/simplejson/compat.py b/simplejson/compat.py new file mode 100644 index 0000000..5fc1412 --- /dev/null +++ b/simplejson/compat.py @@ -0,0 +1,34 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + BytesIO = StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload +else: + PY3 = True + if sys.version_info[:2] >= (3, 4): + from importlib import reload as reload_module + else: + from imp import reload as reload_module + def b(s): + return bytes(s, 'latin1') + from io import StringIO, BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + unichr = chr + +long_type = integer_types[-1] diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 0000000..7f0b056 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,400 @@ +"""Implementation of JSONDecoder +""" +from __future__ import absolute_import +import re +import sys +import struct +from .compat import PY3, unichr +from .scanner import make_scanner, JSONDecodeError + +def _import_c_scanstring(): + try: + from ._speedups import scanstring + return scanstring + except ImportError: + return None +c_scanstring = _import_c_scanstring() + +# NOTE (3.1.0): JSONDecodeError may still be imported from this module for +# compatibility, but it was never in the __all__ +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + if sys.version_info < (2, 6): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + nan, inf = struct.unpack('>dd', _BYTES) + else: + nan = float('nan') + inf = float('inf') + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, + _PY3=PY3, _maxunicode=sys.maxunicode): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise JSONDecodeError( + "Unterminated string starting at", s, begin) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not _PY3 and not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" + raise JSONDecodeError(msg, s, end) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise JSONDecodeError( + "Unterminated string starting at", s, begin) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\X escape sequence %r" + raise JSONDecodeError(msg, s, end) + end += 1 + else: + # Unicode escape sequence + msg = "Invalid \\uXXXX escape sequence" + esc = s[end + 1:end + 5] + escX = esc[1:2] + if len(esc) != 4 or escX == 'x' or escX == 'X': + raise JSONDecodeError(msg, s, end - 1) + try: + uni = int(esc, 16) + except ValueError: + raise JSONDecodeError(msg, s, end - 1) + end += 5 + # Check for surrogate pair on UCS-4 systems + # Note that this will join high/low surrogate pairs + # but will also pass unpaired surrogates through + if (_maxunicode > 65535 and + uni & 0xfc00 == 0xd800 and + s[end:end + 2] == '\\u'): + esc2 = s[end + 2:end + 6] + escX = esc2[1:2] + if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 == 0xdc00: + uni = 0x10000 + (((uni - 0xd800) << 10) | + (uni2 - 0xdc00)) + end += 6 + char = unichr(uni) + # Append the unescaped character + _append(char) + return _join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject(state, encoding, strict, scan_once, object_hook, + object_pairs_hook, memo=None, + _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault + pairs = [] + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + 1 + pairs = {} + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + 1 + elif nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + key = memo_get(key, key) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise JSONDecodeError("Expecting ':' delimiter", s, end) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + value, end = scan_once(s, end) + pairs.append((key, value)) + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end - 1) + + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + elif nextchar == '': + raise JSONDecodeError("Expecting value or ']'", s, end) + _append = values.append + while True: + value, end = scan_once(s, end) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | str, unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + """ + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + *strict* controls the parser's behavior when it encounters an + invalid control character in a string. The default setting of + ``True`` means that unescaped control characters are parse errors, if + ``False`` then control characters will be allowed in strings. + + """ + if encoding is None: + encoding = DEFAULT_ENCODING + self.encoding = encoding + self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.memo = {} + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + if _PY3 and isinstance(s, bytes): + s = str(s, self.encoding) + obj, end = self.raw_decode(s) + end = _w(s, end).end() + if end != len(s): + raise JSONDecodeError("Extra data", s, end, len(s)) + return obj + + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` + beginning with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + Optionally, ``idx`` can be used to specify an offset in ``s`` where + the JSON document begins. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + if idx < 0: + # Ensure that raw_decode bails on negative indexes, the regex + # would otherwise mask this behavior. #98 + raise JSONDecodeError('Expecting value', s, idx) + if _PY3 and not isinstance(s, str): + raise TypeError("Input string must be text, not bytes") + # strip UTF-8 bom + if len(s) > idx: + ord0 = ord(s[idx]) + if ord0 == 0xfeff: + idx += 1 + elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': + idx += 3 + return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/simplejson/encoder.py b/simplejson/encoder.py new file mode 100644 index 0000000..7ea172e --- /dev/null +++ b/simplejson/encoder.py @@ -0,0 +1,722 @@ +"""Implementation of JSONEncoder +""" +from __future__ import absolute_import +import re +from operator import itemgetter +# Do not import Decimal directly to avoid reload issues +import decimal +from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3 +def _import_speedups(): + try: + from . import _speedups + return _speedups.encode_basestring_ascii, _speedups.make_encoder + except ImportError: + return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() + +from .decoder import PosInf +from .raw_json import RawJSON + +ESCAPE = re.compile(r'[\x00-\x1f\\"]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +FLOAT_REPR = repr + +def encode_basestring(s, _PY3=PY3, _q=u'"'): + """Return a JSON representation of a Python string + + """ + if _PY3: + if isinstance(s, bytes): + s = str(s, 'utf-8') + elif type(s) is not str: + # convert an str subclass instance to exact str + # raise a TypeError otherwise + s = str.__str__(s) + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode + # raise a TypeError otherwise + if isinstance(s, str): + s = str.__str__(s) + else: + s = unicode.__getnewargs__(s)[0] + def replace(match): + return ESCAPE_DCT[match.group(0)] + return _q + ESCAPE.sub(replace, s) + _q + + +def py_encode_basestring_ascii(s, _PY3=PY3): + """Return an ASCII-only JSON representation of a Python string + + """ + if _PY3: + if isinstance(s, bytes): + s = str(s, 'utf-8') + elif type(s) is not str: + # convert an str subclass instance to exact str + # raise a TypeError otherwise + s = str.__str__(s) + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode + # raise a TypeError otherwise + if isinstance(s, str): + s = str.__str__(s) + else: + s = unicode.__getnewargs__(s)[0] + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + #return '\\u{0:04x}'.format(n) + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = ( + c_encode_basestring_ascii or py_encode_basestring_ascii) + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict, namedtuple | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True, bigint_as_string=False, + item_sort_key=None, for_json=False, ignore_nan=False, + int_as_string_bitcount=None, iterable_as_array=False): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. + + If specified, separators should be an (item_separator, key_separator) + tuple. The default is (', ', ': ') if *indent* is ``None`` and + (',', ': ') otherwise. To get the most compact JSON representation, + you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + If use_decimal is true (default: ``True``), ``decimal.Decimal`` will + be supported directly by the encoder. For the inverse, decode JSON + with ``parse_float=decimal.Decimal``. + + If namedtuple_as_object is true (the default), objects with + ``_asdict()`` methods will be encoded as JSON objects. + + If tuple_as_array is true (the default), tuple (and subclasses) will + be encoded as JSON arrays. + + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If int_as_string_bitcount is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, item_sort_key is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. + + If for_json is true (not the default), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized + as ``null`` in compliance with the ECMA-262 specification. If true, + this will override *allow_nan*. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.use_decimal = use_decimal + self.namedtuple_as_object = namedtuple_as_object + self.tuple_as_array = tuple_as_array + self.iterable_as_array = iterable_as_array + self.bigint_as_string = bigint_as_string + self.item_sort_key = item_sort_key + self.for_json = for_json + self.ignore_nan = ignore_nan + self.int_as_string_bitcount = int_as_string_bitcount + if indent is not None and not isinstance(indent, string_types): + indent = indent * ' ' + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + elif indent is not None: + self.item_separator = ',' + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError('Object of type %s is not JSON serializable' % + o.__class__.__name__) + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> from simplejson import JSONEncoder + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = text_type(o, _encoding) + if isinstance(o, string_types): + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + if self.ensure_ascii: + return ''.join(chunks) + else: + return u''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8' and self.encoding is not None: + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, binary_type): + o = text_type(o, _encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, + _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on + # the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + if type(o) != float: + # See #118, do not trust custom str/repr + o = float(o) + return _repr(o) + + if ignore_nan: + text = 'null' + elif not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + key_memo = {} + int_as_string_bitcount = ( + 53 if self.bigint_as_string else self.int_as_string_bitcount) + if (_one_shot and c_make_encoder is not None + and self.indent is None): + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan, key_memo, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.ignore_nan, decimal.Decimal, self.iterable_as_array) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.iterable_as_array, Decimal=decimal.Decimal) + try: + return _iterencode(o, 0) + finally: + key_memo.clear() + + +class JSONEncoderForHTML(JSONEncoder): + """An encoder that produces JSON safe to embed in HTML. + + To embed JSON content in, say, a script tag on a web page, the + characters &, < and > should be escaped. They cannot be escaped + with the usual entities (e.g. &) because they are not expanded + within ' + self.assertEqual( + r'"\u003c/script\u003e\u003cscript\u003e' + r'alert(\"gotcha\")\u003c/script\u003e"', + self.encoder.encode(bad_string)) + self.assertEqual( + bad_string, self.decoder.decode( + self.encoder.encode(bad_string))) diff --git a/simplejson/tests/test_errors.py b/simplejson/tests/test_errors.py new file mode 100644 index 0000000..d573825 --- /dev/null +++ b/simplejson/tests/test_errors.py @@ -0,0 +1,68 @@ +import sys, pickle +from unittest import TestCase + +import simplejson as json +from simplejson.compat import text_type, b + +class TestErrors(TestCase): + def test_string_keys_error(self): + data = [{'a': 'A', 'b': (2, 4), 'c': 3.0, ('d',): 'D tuple'}] + try: + json.dumps(data) + except TypeError: + err = sys.exc_info()[1] + else: + self.fail('Expected TypeError') + self.assertEqual(str(err), + 'keys must be str, int, float, bool or None, not tuple') + + def test_not_serializable(self): + try: + json.dumps(json) + except TypeError: + err = sys.exc_info()[1] + else: + self.fail('Expected TypeError') + self.assertEqual(str(err), + 'Object of type module is not JSON serializable') + + def test_decode_error(self): + err = None + try: + json.loads('{}\na\nb') + except json.JSONDecodeError: + err = sys.exc_info()[1] + else: + self.fail('Expected JSONDecodeError') + self.assertEqual(err.lineno, 2) + self.assertEqual(err.colno, 1) + self.assertEqual(err.endlineno, 3) + self.assertEqual(err.endcolno, 2) + + def test_scan_error(self): + err = None + for t in (text_type, b): + try: + json.loads(t('{"asdf": "')) + except json.JSONDecodeError: + err = sys.exc_info()[1] + else: + self.fail('Expected JSONDecodeError') + self.assertEqual(err.lineno, 1) + self.assertEqual(err.colno, 10) + + def test_error_is_pickable(self): + err = None + try: + json.loads('{}\na\nb') + except json.JSONDecodeError: + err = sys.exc_info()[1] + else: + self.fail('Expected JSONDecodeError') + s = pickle.dumps(err) + e = pickle.loads(s) + + self.assertEqual(err.msg, e.msg) + self.assertEqual(err.doc, e.doc) + self.assertEqual(err.pos, e.pos) + self.assertEqual(err.end, e.end) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 0000000..788f3a5 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,176 @@ +import sys +from unittest import TestCase + +import simplejson as json + +# 2007-10-05 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted"}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '[\\naked]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://json.org/JSON_checker/test/fail25.json + '["\ttab\tcharacter\tin\tstring\t"]', + # http://json.org/JSON_checker/test/fail26.json + '["tab\\ character\\ in\\ string\\ "]', + # http://json.org/JSON_checker/test/fail27.json + '["line\nbreak"]', + # http://json.org/JSON_checker/test/fail28.json + '["line\\\nbreak"]', + # http://json.org/JSON_checker/test/fail29.json + '[0e]', + # http://json.org/JSON_checker/test/fail30.json + '[0e+]', + # http://json.org/JSON_checker/test/fail31.json + '[0e+-1]', + # http://json.org/JSON_checker/test/fail32.json + '{"Comma instead if closing brace": true,', + # http://json.org/JSON_checker/test/fail33.json + '["mismatch"}', + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', + # misc based on coverage + '{', + '{]', + '{"foo": "bar"]', + '{"foo": "bar"', + 'nul', + 'nulx', + '-', + '-x', + '-e', + '-e0', + '-Infinite', + '-Inf', + 'Infinit', + 'Infinite', + 'NaM', + 'NuN', + 'falsy', + 'fal', + 'trug', + 'tru', + '1e', + '1ex', + '1e-', + '1e-x', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except json.JSONDecodeError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) + + def test_array_decoder_issue46(self): + # http://code.google.com/p/simplejson/issues/detail?id=46 + for doc in [u'[,]', '[,]']: + try: + json.loads(doc) + except json.JSONDecodeError: + e = sys.exc_info()[1] + self.assertEqual(e.pos, 1) + self.assertEqual(e.lineno, 1) + self.assertEqual(e.colno, 2) + except Exception: + e = sys.exc_info()[1] + self.fail("Unexpected exception raised %r %s" % (e, e)) + else: + self.fail("Unexpected success parsing '[,]'") + + def test_truncated_input(self): + test_cases = [ + ('', 'Expecting value', 0), + ('[', "Expecting value or ']'", 1), + ('[42', "Expecting ',' delimiter", 3), + ('[42,', 'Expecting value', 4), + ('["', 'Unterminated string starting at', 1), + ('["spam', 'Unterminated string starting at', 1), + ('["spam"', "Expecting ',' delimiter", 7), + ('["spam",', 'Expecting value', 8), + ('{', 'Expecting property name enclosed in double quotes', 1), + ('{"', 'Unterminated string starting at', 1), + ('{"spam', 'Unterminated string starting at', 1), + ('{"spam"', "Expecting ':' delimiter", 7), + ('{"spam":', 'Expecting value', 8), + ('{"spam":42', "Expecting ',' delimiter", 10), + ('{"spam":42,', 'Expecting property name enclosed in double quotes', + 11), + ('"', 'Unterminated string starting at', 0), + ('"spam', 'Unterminated string starting at', 0), + ('[,', "Expecting value", 1), + ] + for data, msg, idx in test_cases: + try: + json.loads(data) + except json.JSONDecodeError: + e = sys.exc_info()[1] + self.assertEqual( + e.msg[:len(msg)], + msg, + "%r doesn't start with %r for %r" % (e.msg, msg, data)) + self.assertEqual( + e.pos, idx, + "pos %r != %r for %r" % (e.pos, idx, data)) + except Exception: + e = sys.exc_info()[1] + self.fail("Unexpected exception raised %r %s" % (e, e)) + else: + self.fail("Unexpected success parsing '%r'" % (data,)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 0000000..e382ec2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,35 @@ +import math +from unittest import TestCase +from simplejson.compat import long_type, text_type +import simplejson as json +from simplejson.decoder import NaN, PosInf, NegInf + +class TestFloat(TestCase): + def test_degenerates_allow(self): + for inf in (PosInf, NegInf): + self.assertEqual(json.loads(json.dumps(inf)), inf) + # Python 2.5 doesn't have math.isnan + nan = json.loads(json.dumps(NaN)) + self.assertTrue((0 + nan) != nan) + + def test_degenerates_ignore(self): + for f in (PosInf, NegInf, NaN): + self.assertEqual(json.loads(json.dumps(f, ignore_nan=True)), None) + + def test_degenerates_deny(self): + for f in (PosInf, NegInf, NaN): + self.assertRaises(ValueError, json.dumps, f, allow_nan=False) + + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, + math.pi**-100, 3.1]: + self.assertEqual(float(json.dumps(num)), num) + self.assertEqual(json.loads(json.dumps(num)), num) + self.assertEqual(json.loads(text_type(json.dumps(num))), num) + + def test_ints(self): + for num in [1, long_type(1), 1<<32, 1<<64]: + self.assertEqual(json.dumps(num), str(num)) + self.assertEqual(int(json.dumps(num)), num) + self.assertEqual(json.loads(json.dumps(num)), num) + self.assertEqual(json.loads(text_type(json.dumps(num))), num) diff --git a/simplejson/tests/test_for_json.py b/simplejson/tests/test_for_json.py new file mode 100644 index 0000000..b791b88 --- /dev/null +++ b/simplejson/tests/test_for_json.py @@ -0,0 +1,97 @@ +import unittest +import simplejson as json + + +class ForJson(object): + def for_json(self): + return {'for_json': 1} + + +class NestedForJson(object): + def for_json(self): + return {'nested': ForJson()} + + +class ForJsonList(object): + def for_json(self): + return ['list'] + + +class DictForJson(dict): + def for_json(self): + return {'alpha': 1} + + +class ListForJson(list): + def for_json(self): + return ['list'] + + +class TestForJson(unittest.TestCase): + def assertRoundTrip(self, obj, other, for_json=True): + if for_json is None: + # None will use the default + s = json.dumps(obj) + else: + s = json.dumps(obj, for_json=for_json) + self.assertEqual( + json.loads(s), + other) + + def test_for_json_encodes_stand_alone_object(self): + self.assertRoundTrip( + ForJson(), + ForJson().for_json()) + + def test_for_json_encodes_object_nested_in_dict(self): + self.assertRoundTrip( + {'hooray': ForJson()}, + {'hooray': ForJson().for_json()}) + + def test_for_json_encodes_object_nested_in_list_within_dict(self): + self.assertRoundTrip( + {'list': [0, ForJson(), 2, 3]}, + {'list': [0, ForJson().for_json(), 2, 3]}) + + def test_for_json_encodes_object_nested_within_object(self): + self.assertRoundTrip( + NestedForJson(), + {'nested': {'for_json': 1}}) + + def test_for_json_encodes_list(self): + self.assertRoundTrip( + ForJsonList(), + ForJsonList().for_json()) + + def test_for_json_encodes_list_within_object(self): + self.assertRoundTrip( + {'nested': ForJsonList()}, + {'nested': ForJsonList().for_json()}) + + def test_for_json_encodes_dict_subclass(self): + self.assertRoundTrip( + DictForJson(a=1), + DictForJson(a=1).for_json()) + + def test_for_json_encodes_list_subclass(self): + self.assertRoundTrip( + ListForJson(['l']), + ListForJson(['l']).for_json()) + + def test_for_json_ignored_if_not_true_with_dict_subclass(self): + for for_json in (None, False): + self.assertRoundTrip( + DictForJson(a=1), + {'a': 1}, + for_json=for_json) + + def test_for_json_ignored_if_not_true_with_list_subclass(self): + for for_json in (None, False): + self.assertRoundTrip( + ListForJson(['l']), + ['l'], + for_json=for_json) + + def test_raises_typeerror_if_for_json_not_true_with_object(self): + self.assertRaises(TypeError, json.dumps, ForJson()) + self.assertRaises(TypeError, json.dumps, ForJson(), for_json=False) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 0000000..cea25a5 --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,86 @@ +from unittest import TestCase +import textwrap + +import simplejson as json +from simplejson.compat import StringIO + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', + 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + \t[ + \t\t"blorpie" + \t], + \t[ + \t\t"whoops" + \t], + \t[], + \t"d-shtaeou", + \t"d-nthiouh", + \t"i-vhbjkhnth", + \t{ + \t\t"nifty": 87 + \t}, + \t{ + \t\t"field": "yes", + \t\t"morefield": false + \t} + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent='\t', sort_keys=True, separators=(',', ': ')) + d3 = json.dumps(h, indent=' ', sort_keys=True, separators=(',', ': ')) + d4 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + h3 = json.loads(d3) + h4 = json.loads(d4) + + self.assertEqual(h1, h) + self.assertEqual(h2, h) + self.assertEqual(h3, h) + self.assertEqual(h4, h) + self.assertEqual(d3, expect.replace('\t', ' ')) + self.assertEqual(d4, expect.replace('\t', ' ')) + # NOTE: Python 2.4 textwrap.dedent converts tabs to spaces, + # so the following is expected to fail. Python 2.4 is not a + # supported platform in simplejson 2.1.0+. + self.assertEqual(d2, expect) + + def test_indent0(self): + h = {3: 1} + def check(indent, expected): + d1 = json.dumps(h, indent=indent) + self.assertEqual(d1, expected) + + sio = StringIO() + json.dump(h, sio, indent=indent) + self.assertEqual(sio.getvalue(), expected) + + # indent=0 should emit newlines + check(0, '{\n"3": 1\n}') + # indent=None is more compact + check(None, '{"3": 1}') + + def test_separators(self): + lst = [1,2,3,4] + expect = '[\n1,\n2,\n3,\n4\n]' + expect_spaces = '[\n1, \n2, \n3, \n4\n]' + # Ensure that separators still works + self.assertEqual( + expect_spaces, + json.dumps(lst, indent=0, separators=(', ', ': '))) + # Force the new defaults + self.assertEqual( + expect, + json.dumps(lst, indent=0, separators=(',', ': '))) + # Added in 2.1.4 + self.assertEqual( + expect, + json.dumps(lst, indent=0)) diff --git a/simplejson/tests/test_item_sort_key.py b/simplejson/tests/test_item_sort_key.py new file mode 100644 index 0000000..98971b8 --- /dev/null +++ b/simplejson/tests/test_item_sort_key.py @@ -0,0 +1,27 @@ +from unittest import TestCase + +import simplejson as json +from operator import itemgetter + +class TestItemSortKey(TestCase): + def test_simple_first(self): + a = {'a': 1, 'c': 5, 'jack': 'jill', 'pick': 'axe', 'array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'} + self.assertEqual( + '{"a": 1, "c": 5, "crate": "dog", "jack": "jill", "pick": "axe", "zeak": "oh", "array": [1, 5, 6, 9], "tuple": [83, 12, 3]}', + json.dumps(a, item_sort_key=json.simple_first)) + + def test_case(self): + a = {'a': 1, 'c': 5, 'Jack': 'jill', 'pick': 'axe', 'Array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'} + self.assertEqual( + '{"Array": [1, 5, 6, 9], "Jack": "jill", "a": 1, "c": 5, "crate": "dog", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}', + json.dumps(a, item_sort_key=itemgetter(0))) + self.assertEqual( + '{"a": 1, "Array": [1, 5, 6, 9], "c": 5, "crate": "dog", "Jack": "jill", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}', + json.dumps(a, item_sort_key=lambda kv: kv[0].lower())) + + def test_item_sort_key_value(self): + # https://github.com/simplejson/simplejson/issues/173 + a = {'a': 1, 'b': 0} + self.assertEqual( + '{"b": 0, "a": 1}', + json.dumps(a, item_sort_key=lambda kv: kv[1])) diff --git a/simplejson/tests/test_iterable.py b/simplejson/tests/test_iterable.py new file mode 100644 index 0000000..35d3e75 --- /dev/null +++ b/simplejson/tests/test_iterable.py @@ -0,0 +1,31 @@ +import unittest +from simplejson.compat import StringIO + +import simplejson as json + +def iter_dumps(obj, **kw): + return ''.join(json.JSONEncoder(**kw).iterencode(obj)) + +def sio_dump(obj, **kw): + sio = StringIO() + json.dumps(obj, **kw) + return sio.getvalue() + +class TestIterable(unittest.TestCase): + def test_iterable(self): + for l in ([], [1], [1, 2], [1, 2, 3]): + for opts in [{}, {'indent': 2}]: + for dumps in (json.dumps, iter_dumps, sio_dump): + expect = dumps(l, **opts) + default_expect = dumps(sum(l), **opts) + # Default is False + self.assertRaises(TypeError, dumps, iter(l), **opts) + self.assertRaises(TypeError, dumps, iter(l), iterable_as_array=False, **opts) + self.assertEqual(expect, dumps(iter(l), iterable_as_array=True, **opts)) + # Ensure that the "default" gets called + self.assertEqual(default_expect, dumps(iter(l), default=sum, **opts)) + self.assertEqual(default_expect, dumps(iter(l), iterable_as_array=False, default=sum, **opts)) + # Ensure that the "default" does not get called + self.assertEqual( + expect, + dumps(iter(l), iterable_as_array=True, default=sum, **opts)) diff --git a/simplejson/tests/test_namedtuple.py b/simplejson/tests/test_namedtuple.py new file mode 100644 index 0000000..4387894 --- /dev/null +++ b/simplejson/tests/test_namedtuple.py @@ -0,0 +1,122 @@ +from __future__ import absolute_import +import unittest +import simplejson as json +from simplejson.compat import StringIO + +try: + from collections import namedtuple +except ImportError: + class Value(tuple): + def __new__(cls, *args): + return tuple.__new__(cls, args) + + def _asdict(self): + return {'value': self[0]} + class Point(tuple): + def __new__(cls, *args): + return tuple.__new__(cls, args) + + def _asdict(self): + return {'x': self[0], 'y': self[1]} +else: + Value = namedtuple('Value', ['value']) + Point = namedtuple('Point', ['x', 'y']) + +class DuckValue(object): + def __init__(self, *args): + self.value = Value(*args) + + def _asdict(self): + return self.value._asdict() + +class DuckPoint(object): + def __init__(self, *args): + self.point = Point(*args) + + def _asdict(self): + return self.point._asdict() + +class DeadDuck(object): + _asdict = None + +class DeadDict(dict): + _asdict = None + +CONSTRUCTORS = [ + lambda v: v, + lambda v: [v], + lambda v: [{'key': v}], +] + +class TestNamedTuple(unittest.TestCase): + def test_namedtuple_dumps(self): + for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]: + d = v._asdict() + self.assertEqual(d, json.loads(json.dumps(v))) + self.assertEqual( + d, + json.loads(json.dumps(v, namedtuple_as_object=True))) + self.assertEqual(d, json.loads(json.dumps(v, tuple_as_array=False))) + self.assertEqual( + d, + json.loads(json.dumps(v, namedtuple_as_object=True, + tuple_as_array=False))) + + def test_namedtuple_dumps_false(self): + for v in [Value(1), Point(1, 2)]: + l = list(v) + self.assertEqual( + l, + json.loads(json.dumps(v, namedtuple_as_object=False))) + self.assertRaises(TypeError, json.dumps, v, + tuple_as_array=False, namedtuple_as_object=False) + + def test_namedtuple_dump(self): + for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]: + d = v._asdict() + sio = StringIO() + json.dump(v, sio) + self.assertEqual(d, json.loads(sio.getvalue())) + sio = StringIO() + json.dump(v, sio, namedtuple_as_object=True) + self.assertEqual( + d, + json.loads(sio.getvalue())) + sio = StringIO() + json.dump(v, sio, tuple_as_array=False) + self.assertEqual(d, json.loads(sio.getvalue())) + sio = StringIO() + json.dump(v, sio, namedtuple_as_object=True, + tuple_as_array=False) + self.assertEqual( + d, + json.loads(sio.getvalue())) + + def test_namedtuple_dump_false(self): + for v in [Value(1), Point(1, 2)]: + l = list(v) + sio = StringIO() + json.dump(v, sio, namedtuple_as_object=False) + self.assertEqual( + l, + json.loads(sio.getvalue())) + self.assertRaises(TypeError, json.dump, v, StringIO(), + tuple_as_array=False, namedtuple_as_object=False) + + def test_asdict_not_callable_dump(self): + for f in CONSTRUCTORS: + self.assertRaises(TypeError, + json.dump, f(DeadDuck()), StringIO(), namedtuple_as_object=True) + sio = StringIO() + json.dump(f(DeadDict()), sio, namedtuple_as_object=True) + self.assertEqual( + json.dumps(f({})), + sio.getvalue()) + + def test_asdict_not_callable_dumps(self): + for f in CONSTRUCTORS: + self.assertRaises(TypeError, + json.dumps, f(DeadDuck()), namedtuple_as_object=True) + self.assertEqual( + json.dumps(f({})), + json.dumps(f(DeadDict()), namedtuple_as_object=True)) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 0000000..f0b5b10 --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,71 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E66, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ],"compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066, +1e1, +0.1e1, +1e-1, +1e00,2e+00,2e-00 +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEqual(res, json.loads(out)) diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 0000000..5d812b3 --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEqual(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 0000000..821d60b --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEqual(res, json.loads(out)) diff --git a/simplejson/tests/test_raw_json.py b/simplejson/tests/test_raw_json.py new file mode 100644 index 0000000..1dfcc2c --- /dev/null +++ b/simplejson/tests/test_raw_json.py @@ -0,0 +1,47 @@ +import unittest +import simplejson as json + +dct1 = { + 'key1': 'value1' +} + +dct2 = { + 'key2': 'value2', + 'd1': dct1 +} + +dct3 = { + 'key2': 'value2', + 'd1': json.dumps(dct1) +} + +dct4 = { + 'key2': 'value2', + 'd1': json.RawJSON(json.dumps(dct1)) +} + + +class TestRawJson(unittest.TestCase): + + def test_normal_str(self): + self.assertNotEqual(json.dumps(dct2), json.dumps(dct3)) + + def test_raw_json_str(self): + self.assertEqual(json.dumps(dct2), json.dumps(dct4)) + self.assertEqual(dct2, json.loads(json.dumps(dct4))) + + def test_list(self): + self.assertEqual( + json.dumps([dct2]), + json.dumps([json.RawJSON(json.dumps(dct2))])) + self.assertEqual( + [dct2], + json.loads(json.dumps([json.RawJSON(json.dumps(dct2))]))) + + def test_direct(self): + self.assertEqual( + json.dumps(dct2), + json.dumps(json.RawJSON(json.dumps(dct2)))) + self.assertEqual( + dct2, + json.loads(json.dumps(json.RawJSON(json.dumps(dct2))))) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 0000000..662eb66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(y) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEqual(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 0000000..d5de180 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,196 @@ +import sys +from unittest import TestCase + +import simplejson as json +import simplejson.decoder +from simplejson.compat import b, PY3 + +class TestScanString(TestCase): + # The bytes type is intentionally not used in most of these tests + # under Python 3 because the decoder immediately coerces to str before + # calling scanstring. In Python 2 we are testing the code paths + # for both unicode and str. + # + # The reason this is done is because Python 3 would require + # entirely different code paths for parsing bytes and str. + # + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + self.assertTrue(isinstance(simplejson.decoder.c_scanstring('""', 0)[0], str)) + + def _test_scanstring(self, scanstring): + if sys.maxunicode == 65535: + self.assertEqual( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEqual( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEqual( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEqual( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEqual( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEqual( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEqual( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEqual( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEqual( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEqual( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEqual( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEqual( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEqual( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEqual( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEqual( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEqual( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEqual( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEqual( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEqual( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEqual( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEqual( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + for c in map(chr, range(0x00, 0x1f)): + self.assertEqual( + scanstring(c + '"', 0, None, False), + (c, 2)) + self.assertRaises( + ValueError, + scanstring, c + '"', 0, None, True) + + self.assertRaises(ValueError, scanstring, '', 0, None, True) + self.assertRaises(ValueError, scanstring, 'a', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True) + if sys.maxunicode > 65535: + self.assertRaises(ValueError, + scanstring, '\\ud834\\u"', 0, None, True) + self.assertRaises(ValueError, + scanstring, '\\ud834\\x0123"', 0, None, True) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, b("xx\xff")) + + def test_overflow(self): + # Python 2.5 does not have maxsize, Python 3 does not have maxint + maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None)) + assert maxsize is not None + self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", + maxsize + 1) + + def test_surrogates(self): + scanstring = json.decoder.scanstring + + def assertScan(given, expect, test_utf8=True): + givens = [given] + if not PY3 and test_utf8: + givens.append(given.encode('utf8')) + for given in givens: + (res, count) = scanstring(given, 1, None, True) + self.assertEqual(len(given), count) + self.assertEqual(res, expect) + + assertScan( + u'"z\\ud834\\u0079x"', + u'z\ud834yx') + assertScan( + u'"z\\ud834\\udd20x"', + u'z\U0001d120x') + assertScan( + u'"z\\ud834\\ud834\\udd20x"', + u'z\ud834\U0001d120x') + assertScan( + u'"z\\ud834x"', + u'z\ud834x') + assertScan( + u'"z\\udd20x"', + u'z\udd20x') + assertScan( + u'"z\ud834x"', + u'z\ud834x') + # It may look strange to join strings together, but Python is drunk. + # https://gist.github.com/etrepum/5538443 + assertScan( + u'"z\\ud834\udd20x12345"', + u''.join([u'z\ud834', u'\udd20x12345'])) + assertScan( + u'"z\ud834\\udd20x"', + u''.join([u'z\ud834', u'\udd20x'])) + # these have different behavior given UTF8 input, because the surrogate + # pair may be joined (in maxunicode > 65535 builds) + assertScan( + u''.join([u'"z\ud834', u'\udd20x"']), + u''.join([u'z\ud834', u'\udd20x']), + test_utf8=False) + + self.assertRaises(ValueError, + scanstring, u'"z\\ud83x"', 1, None, True) + self.assertRaises(ValueError, + scanstring, u'"z\\ud834\\udd2x"', 1, None, True) diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 0000000..91b4d4f --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=' ', sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEqual(h1, h) + self.assertEqual(h2, h) + self.assertEqual(d2, expect) diff --git a/simplejson/tests/test_speedups.py b/simplejson/tests/test_speedups.py new file mode 100644 index 0000000..8b146df --- /dev/null +++ b/simplejson/tests/test_speedups.py @@ -0,0 +1,114 @@ +from __future__ import with_statement + +import sys +import unittest +from unittest import TestCase + +import simplejson +from simplejson import encoder, decoder, scanner +from simplejson.compat import PY3, long_type, b + + +def has_speedups(): + return encoder.c_make_encoder is not None + + +def skip_if_speedups_missing(func): + def wrapper(*args, **kwargs): + if not has_speedups(): + if hasattr(unittest, 'SkipTest'): + raise unittest.SkipTest("C Extension not available") + else: + sys.stdout.write("C Extension not available") + return + return func(*args, **kwargs) + + return wrapper + + +class BadBool: + def __bool__(self): + 1/0 + __nonzero__ = __bool__ + + +class TestDecode(TestCase): + @skip_if_speedups_missing + def test_make_scanner(self): + self.assertRaises(AttributeError, scanner.c_make_scanner, 1) + + @skip_if_speedups_missing + def test_bad_bool_args(self): + def test(value): + decoder.JSONDecoder(strict=BadBool()).decode(value) + self.assertRaises(ZeroDivisionError, test, '""') + self.assertRaises(ZeroDivisionError, test, '{}') + if not PY3: + self.assertRaises(ZeroDivisionError, test, u'""') + self.assertRaises(ZeroDivisionError, test, u'{}') + +class TestEncode(TestCase): + @skip_if_speedups_missing + def test_make_encoder(self): + self.assertRaises( + TypeError, + encoder.c_make_encoder, + None, + ("\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7" + "\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75"), + None + ) + + @skip_if_speedups_missing + def test_bad_str_encoder(self): + # Issue #31505: There shouldn't be an assertion failure in case + # c_make_encoder() receives a bad encoder() argument. + import decimal + def bad_encoder1(*args): + return None + enc = encoder.c_make_encoder( + None, lambda obj: str(obj), + bad_encoder1, None, ': ', ', ', + False, False, False, {}, False, False, False, + None, None, 'utf-8', False, False, decimal.Decimal, False) + self.assertRaises(TypeError, enc, 'spam', 4) + self.assertRaises(TypeError, enc, {'spam': 42}, 4) + + def bad_encoder2(*args): + 1/0 + enc = encoder.c_make_encoder( + None, lambda obj: str(obj), + bad_encoder2, None, ': ', ', ', + False, False, False, {}, False, False, False, + None, None, 'utf-8', False, False, decimal.Decimal, False) + self.assertRaises(ZeroDivisionError, enc, 'spam', 4) + + @skip_if_speedups_missing + def test_bad_bool_args(self): + def test(name): + encoder.JSONEncoder(**{name: BadBool()}).encode({}) + self.assertRaises(ZeroDivisionError, test, 'skipkeys') + self.assertRaises(ZeroDivisionError, test, 'ensure_ascii') + self.assertRaises(ZeroDivisionError, test, 'check_circular') + self.assertRaises(ZeroDivisionError, test, 'allow_nan') + self.assertRaises(ZeroDivisionError, test, 'sort_keys') + self.assertRaises(ZeroDivisionError, test, 'use_decimal') + self.assertRaises(ZeroDivisionError, test, 'namedtuple_as_object') + self.assertRaises(ZeroDivisionError, test, 'tuple_as_array') + self.assertRaises(ZeroDivisionError, test, 'bigint_as_string') + self.assertRaises(ZeroDivisionError, test, 'for_json') + self.assertRaises(ZeroDivisionError, test, 'ignore_nan') + self.assertRaises(ZeroDivisionError, test, 'iterable_as_array') + + @skip_if_speedups_missing + def test_int_as_string_bitcount_overflow(self): + long_count = long_type(2)**32+31 + def test(): + encoder.JSONEncoder(int_as_string_bitcount=long_count).encode(0) + self.assertRaises((TypeError, OverflowError), test) + + if PY3: + @skip_if_speedups_missing + def test_bad_encoding(self): + with self.assertRaises(UnicodeEncodeError): + encoder.JSONEncoder(encoding='\udcff').encode({b('key'): 123}) diff --git a/simplejson/tests/test_str_subclass.py b/simplejson/tests/test_str_subclass.py new file mode 100644 index 0000000..b6c8351 --- /dev/null +++ b/simplejson/tests/test_str_subclass.py @@ -0,0 +1,21 @@ +from unittest import TestCase + +import simplejson +from simplejson.compat import text_type + +# Tests for issue demonstrated in https://github.com/simplejson/simplejson/issues/144 +class WonkyTextSubclass(text_type): + def __getslice__(self, start, end): + return self.__class__('not what you wanted!') + +class TestStrSubclass(TestCase): + def test_dump_load(self): + for s in ['', '"hello"', 'text', u'\u005c']: + self.assertEqual( + s, + simplejson.loads(simplejson.dumps(WonkyTextSubclass(s)))) + + self.assertEqual( + s, + simplejson.loads(simplejson.dumps(WonkyTextSubclass(s), + ensure_ascii=False))) diff --git a/simplejson/tests/test_subclass.py b/simplejson/tests/test_subclass.py new file mode 100644 index 0000000..2bae3b6 --- /dev/null +++ b/simplejson/tests/test_subclass.py @@ -0,0 +1,37 @@ +from unittest import TestCase +import simplejson as json + +from decimal import Decimal + +class AlternateInt(int): + def __repr__(self): + return 'invalid json' + __str__ = __repr__ + + +class AlternateFloat(float): + def __repr__(self): + return 'invalid json' + __str__ = __repr__ + + +# class AlternateDecimal(Decimal): +# def __repr__(self): +# return 'invalid json' + + +class TestSubclass(TestCase): + def test_int(self): + self.assertEqual(json.dumps(AlternateInt(1)), '1') + self.assertEqual(json.dumps(AlternateInt(-1)), '-1') + self.assertEqual(json.loads(json.dumps({AlternateInt(1): 1})), {'1': 1}) + + def test_float(self): + self.assertEqual(json.dumps(AlternateFloat(1.0)), '1.0') + self.assertEqual(json.dumps(AlternateFloat(-1.0)), '-1.0') + self.assertEqual(json.loads(json.dumps({AlternateFloat(1.0): 1})), {'1.0': 1}) + + # NOTE: Decimal subclasses are not supported as-is + # def test_decimal(self): + # self.assertEqual(json.dumps(AlternateDecimal('1.0')), '1.0') + # self.assertEqual(json.dumps(AlternateDecimal('-1.0')), '-1.0') diff --git a/simplejson/tests/test_tool.py b/simplejson/tests/test_tool.py new file mode 100644 index 0000000..914bff8 --- /dev/null +++ b/simplejson/tests/test_tool.py @@ -0,0 +1,114 @@ +from __future__ import with_statement +import os +import sys +import textwrap +import unittest +import subprocess +import tempfile +try: + # Python 3.x + from test.support import strip_python_stderr +except ImportError: + # Python 2.6+ + try: + from test.test_support import strip_python_stderr + except ImportError: + # Python 2.5 + import re + def strip_python_stderr(stderr): + return re.sub( + r"\[\d+ refs\]\r?\n?$".encode(), + "".encode(), + stderr).strip() + +def open_temp_file(): + if sys.version_info >= (2, 6): + file = tempfile.NamedTemporaryFile(delete=False) + filename = file.name + else: + fd, filename = tempfile.mkstemp() + file = os.fdopen(fd, 'w+b') + return file, filename + +class TestTool(unittest.TestCase): + data = """ + + [["blorpie"],[ "whoops" ] , [ + ],\t"d-shtaeou",\r"d-nthiouh", + "i-vhbjkhnth", {"nifty":87}, {"morefield" :\tfalse,"field" + :"yes"} ] + """ + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ] + """) + + def runTool(self, args=None, data=None): + argv = [sys.executable, '-m', 'simplejson.tool'] + if args: + argv.extend(args) + proc = subprocess.Popen(argv, + stdin=subprocess.PIPE, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE) + out, err = proc.communicate(data) + self.assertEqual(strip_python_stderr(err), ''.encode()) + self.assertEqual(proc.returncode, 0) + return out.decode('utf8').splitlines() + + def test_stdin_stdout(self): + self.assertEqual( + self.runTool(data=self.data.encode()), + self.expect.splitlines()) + + def test_infile_stdout(self): + infile, infile_name = open_temp_file() + try: + infile.write(self.data.encode()) + infile.close() + self.assertEqual( + self.runTool(args=[infile_name]), + self.expect.splitlines()) + finally: + os.unlink(infile_name) + + def test_infile_outfile(self): + infile, infile_name = open_temp_file() + try: + infile.write(self.data.encode()) + infile.close() + # outfile will get overwritten by tool, so the delete + # may not work on some platforms. Do it manually. + outfile, outfile_name = open_temp_file() + try: + outfile.close() + self.assertEqual( + self.runTool(args=[infile_name, outfile_name]), + []) + with open(outfile_name, 'rb') as f: + self.assertEqual( + f.read().decode('utf8').splitlines(), + self.expect.splitlines() + ) + finally: + os.unlink(outfile_name) + finally: + os.unlink(infile_name) diff --git a/simplejson/tests/test_tuple.py b/simplejson/tests/test_tuple.py new file mode 100644 index 0000000..4ad7b0e --- /dev/null +++ b/simplejson/tests/test_tuple.py @@ -0,0 +1,47 @@ +import unittest + +from simplejson.compat import StringIO +import simplejson as json + +class TestTuples(unittest.TestCase): + def test_tuple_array_dumps(self): + t = (1, 2, 3) + expect = json.dumps(list(t)) + # Default is True + self.assertEqual(expect, json.dumps(t)) + self.assertEqual(expect, json.dumps(t, tuple_as_array=True)) + self.assertRaises(TypeError, json.dumps, t, tuple_as_array=False) + # Ensure that the "default" does not get called + self.assertEqual(expect, json.dumps(t, default=repr)) + self.assertEqual(expect, json.dumps(t, tuple_as_array=True, + default=repr)) + # Ensure that the "default" gets called + self.assertEqual( + json.dumps(repr(t)), + json.dumps(t, tuple_as_array=False, default=repr)) + + def test_tuple_array_dump(self): + t = (1, 2, 3) + expect = json.dumps(list(t)) + # Default is True + sio = StringIO() + json.dump(t, sio) + self.assertEqual(expect, sio.getvalue()) + sio = StringIO() + json.dump(t, sio, tuple_as_array=True) + self.assertEqual(expect, sio.getvalue()) + self.assertRaises(TypeError, json.dump, t, StringIO(), + tuple_as_array=False) + # Ensure that the "default" does not get called + sio = StringIO() + json.dump(t, sio, default=repr) + self.assertEqual(expect, sio.getvalue()) + sio = StringIO() + json.dump(t, sio, tuple_as_array=True, default=repr) + self.assertEqual(expect, sio.getvalue()) + # Ensure that the "default" gets called + sio = StringIO() + json.dump(t, sio, tuple_as_array=False, default=repr) + self.assertEqual( + json.dumps(repr(t)), + sio.getvalue()) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 0000000..0c7b1a6 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,154 @@ +import sys +import codecs +from unittest import TestCase + +import simplejson as json +from simplejson.compat import unichr, text_type, b, BytesIO + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEqual(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEqual(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEqual(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEqual(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEqual(j, u'"' + u + u'"') + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEqual(j, u'["' + u + u'"]') + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEqual(json.dumps(u), '"\\ud834\\udd20"') + self.assertEqual(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEqual(json.loads('"' + u + '"'), u) + self.assertEqual(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + #s = '"\\u{0:04x}"'.format(i) + s = '"\\u%04x"' % (i,) + self.assertEqual(json.loads(s), u) + + def test_object_pairs_hook_with_unicode(self): + s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' + p = [(u"xkd", 1), (u"kcw", 2), (u"art", 3), (u"hxm", 4), + (u"qrt", 5), (u"pad", 6), (u"hoy", 7)] + self.assertEqual(json.loads(s), eval(s)) + self.assertEqual(json.loads(s, object_pairs_hook=lambda x: x), p) + od = json.loads(s, object_pairs_hook=json.OrderedDict) + self.assertEqual(od, json.OrderedDict(p)) + self.assertEqual(type(od), json.OrderedDict) + # the object_pairs_hook takes priority over the object_hook + self.assertEqual(json.loads(s, + object_pairs_hook=json.OrderedDict, + object_hook=lambda x: None), + json.OrderedDict(p)) + + + def test_default_encoding(self): + self.assertEqual(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEqual(type(json.loads(u'""')), text_type) + self.assertEqual(type(json.loads(u'"a"')), text_type) + self.assertEqual(type(json.loads(u'["a"]')[0]), text_type) + + def test_ensure_ascii_false_returns_unicode(self): + # http://code.google.com/p/simplejson/issues/detail?id=48 + self.assertEqual(type(json.dumps([], ensure_ascii=False)), text_type) + self.assertEqual(type(json.dumps(0, ensure_ascii=False)), text_type) + self.assertEqual(type(json.dumps({}, ensure_ascii=False)), text_type) + self.assertEqual(type(json.dumps("", ensure_ascii=False)), text_type) + + def test_ensure_ascii_false_bytestring_encoding(self): + # http://code.google.com/p/simplejson/issues/detail?id=48 + doc1 = {u'quux': b('Arr\xc3\xaat sur images')} + doc2 = {u'quux': u'Arr\xeat sur images'} + doc_ascii = '{"quux": "Arr\\u00eat sur images"}' + doc_unicode = u'{"quux": "Arr\xeat sur images"}' + self.assertEqual(json.dumps(doc1), doc_ascii) + self.assertEqual(json.dumps(doc2), doc_ascii) + self.assertEqual(json.dumps(doc1, ensure_ascii=False), doc_unicode) + self.assertEqual(json.dumps(doc2, ensure_ascii=False), doc_unicode) + + def test_ensure_ascii_linebreak_encoding(self): + # http://timelessrepo.com/json-isnt-a-javascript-subset + s1 = u'\u2029\u2028' + s2 = s1.encode('utf8') + expect = '"\\u2029\\u2028"' + expect_non_ascii = u'"\u2029\u2028"' + self.assertEqual(json.dumps(s1), expect) + self.assertEqual(json.dumps(s2), expect) + self.assertEqual(json.dumps(s1, ensure_ascii=False), expect_non_ascii) + self.assertEqual(json.dumps(s2, ensure_ascii=False), expect_non_ascii) + + def test_invalid_escape_sequences(self): + # incomplete escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1234') + # invalid escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12x4"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"') + if sys.maxunicode > 65535: + # invalid escape sequence for low surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"') + + def test_ensure_ascii_still_works(self): + # in the ascii range, ensure that everything is the same + for c in map(unichr, range(0, 127)): + self.assertEqual( + json.dumps(c, ensure_ascii=False), + json.dumps(c)) + snowman = u'\N{SNOWMAN}' + self.assertEqual( + json.dumps(c, ensure_ascii=False), + '"' + c + '"') + + def test_strip_bom(self): + content = u"\u3053\u3093\u306b\u3061\u308f" + json_doc = codecs.BOM_UTF8 + b(json.dumps(content)) + self.assertEqual(json.load(BytesIO(json_doc)), content) + for doc in json_doc, json_doc.decode('utf8'): + self.assertEqual(json.loads(doc), content) diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 0000000..062e8e2 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,42 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +from __future__ import with_statement +import sys +import simplejson as json + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'r') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'r') + outfile = open(sys.argv[2], 'w') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + with infile: + try: + obj = json.load(infile, + object_pairs_hook=json.OrderedDict, + use_decimal=True) + except ValueError: + raise SystemExit(sys.exc_info()[1]) + with outfile: + json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/speedtest-cli.lha b/speedtest-cli.lha new file mode 100644 index 0000000..04975c4 Binary files /dev/null and b/speedtest-cli.lha differ