third_party/simplejson/encoder.py

   1 """Implementation of JSONEncoder
   2 """
   3 import re
   4 from decimal import Decimal
   5
   6 def _import_speedups():
   7     try:
   8         from simplejson import _speedups
   9         return _speedups.encode_basestring_ascii, _speedups.make_encoder
  10     except ImportError:
  11         return None, None
  12 c_encode_basestring_ascii, c_make_encoder = _import_speedups()
  13
  14 from simplejson.decoder import PosInf
  15
  16 ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
  17 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
  18 HAS_UTF8 = re.compile(r'[\x80-\xff]')
  19 ESCAPE_DCT = {
  20     '\\': '\\\\',
  21     '"': '\\"',
  22     '\b': '\\b',
  23     '\f': '\\f',
  24     '\n': '\\n',
  25     '\r': '\\r',
  26     '\t': '\\t',
  27     u'\u2028': '\\u2028',
  28     u'\u2029': '\\u2029',
  29 }
  30 for i in range(0x20):
  31     #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
  32     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
  33
  34 FLOAT_REPR = repr
  35
  36 def encode_basestring(s):
  37     """Return a JSON representation of a Python string
  38
  39     """
  40     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
  41         s = s.decode('utf-8')
  42     def replace(match):
  43         return ESCAPE_DCT[match.group(0)]
  44     return u'"' + ESCAPE.sub(replace, s) + u'"'
  45
  46
  47 def py_encode_basestring_ascii(s):
  48     """Return an ASCII-only JSON representation of a Python string
  49
  50     """
  51     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
  52         s = s.decode('utf-8')
  53     def replace(match):
  54         s = match.group(0)
  55         try:
  56             return ESCAPE_DCT[s]
  57         except KeyError:
  58             n = ord(s)
  59             if n < 0x10000:
  60                 #return '\\u{0:04x}'.format(n)
  61                 return '\\u%04x' % (n,)
  62             else:
  63                 # surrogate pair
  64                 n -= 0x10000
  65                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
  66                 s2 = 0xdc00 | (n & 0x3ff)
  67                 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
  68                 return '\\u%04x\\u%04x' % (s1, s2)
  69     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
  70
  71
  72 encode_basestring_ascii = (
  73     c_encode_basestring_ascii or py_encode_basestring_ascii)
  74
  75 class JSONEncoder(object):
  76     """Extensible JSON <http://json.org> encoder for Python data structures.
  77
  78     Supports the following objects and types by default:
  79
  80     +-------------------+---------------+
  81     | Python            | JSON          |
  82     +===================+===============+
  83     | dict, namedtuple  | object        |
  84     +-------------------+---------------+
  85     | list, tuple       | array         |
  86     +-------------------+---------------+
  87     | str, unicode      | string        |
  88     +-------------------+---------------+
  89     | int, long, float  | number        |
  90     +-------------------+---------------+
  91     | True              | true          |
  92     +-------------------+---------------+
  93     | False             | false         |
  94     +-------------------+---------------+
  95     | None              | null          |
  96     +-------------------+---------------+
  97
  98     To extend this to recognize other objects, subclass and implement a
  99     ``.default()`` method with another method that returns a serializable
 100     object for ``o`` if possible, otherwise it should call the superclass
 101     implementation (to raise ``TypeError``).
 102
 103     """
 104     item_separator = ', '
 105     key_separator = ': '
 106     def __init__(self, skipkeys=False, ensure_ascii=True,
 107             check_circular=True, allow_nan=True, sort_keys=False,
 108             indent=None, separators=None, encoding='utf-8', default=None,
 109             use_decimal=True, namedtuple_as_object=True,
 110             tuple_as_array=True, bigint_as_string=False,
 111             item_sort_key=None):
 112         """Constructor for JSONEncoder, with sensible defaults.
 113
 114         If skipkeys is false, then it is a TypeError to attempt
 115         encoding of keys that are not str, int, long, float or None.  If
 116         skipkeys is True, such items are simply skipped.
 117
 118         If ensure_ascii is true, the output is guaranteed to be str
 119         objects with all incoming unicode characters escaped.  If
 120         ensure_ascii is false, the output will be unicode object.
 121
 122         If check_circular is true, then lists, dicts, and custom encoded
 123         objects will be checked for circular references during encoding to
 124         prevent an infinite recursion (which would cause an OverflowError).
 125         Otherwise, no such check takes place.
 126
 127         If allow_nan is true, then NaN, Infinity, and -Infinity will be
 128         encoded as such.  This behavior is not JSON specification compliant,
 129         but is consistent with most JavaScript based encoders and decoders.
 130         Otherwise, it will be a ValueError to encode such floats.
 131
 132         If sort_keys is true, then the output of dictionaries will be
 133         sorted by key; this is useful for regression tests to ensure
 134         that JSON serializations can be compared on a day-to-day basis.
 135
 136         If indent is a string, then JSON array elements and object members
 137         will be pretty-printed with a newline followed by that string repeated
 138         for each level of nesting. ``None`` (the default) selects the most compact
 139         representation without any newlines. For backwards compatibility with
 140         versions of simplejson earlier than 2.1.0, an integer is also accepted
 141         and is converted to a string with that many spaces.
 142
 143         If specified, separators should be a (item_separator, key_separator)
 144         tuple.  The default is (', ', ': ').  To get the most compact JSON
 145         representation you should specify (',', ':') to eliminate whitespace.
 146
 147         If specified, default is a function that gets called for objects
 148         that can't otherwise be serialized.  It should return a JSON encodable
 149         version of the object or raise a ``TypeError``.
 150
 151         If encoding is not None, then all input strings will be
 152         transformed into unicode using that encoding prior to JSON-encoding.
 153         The default is UTF-8.
 154
 155         If use_decimal is true (not the default), ``decimal.Decimal`` will
 156         be supported directly by the encoder. For the inverse, decode JSON
 157         with ``parse_float=decimal.Decimal``.
 158
 159         If namedtuple_as_object is true (the default), objects with
 160         ``_asdict()`` methods will be encoded as JSON objects.
 161
 162         If tuple_as_array is true (the default), tuple (and subclasses) will
 163         be encoded as JSON arrays.
 164
 165         If bigint_as_string is true (not the default), ints 2**53 and higher
 166         or lower than -2**53 will be encoded as strings. This is to avoid the
 167         rounding that happens in Javascript otherwise.
 168
 169         If specified, item_sort_key is a callable used to sort the items in
 170         each dictionary. This is useful if you want to sort items other than
 171         in alphabetical order by key.
 172         """
 173
 174         self.skipkeys = skipkeys
 175         self.ensure_ascii = ensure_ascii
 176         self.check_circular = check_circular
 177         self.allow_nan = allow_nan
 178         self.sort_keys = sort_keys
 179         self.use_decimal = use_decimal
 180         self.namedtuple_as_object = namedtuple_as_object
 181         self.tuple_as_array = tuple_as_array
 182         self.bigint_as_string = bigint_as_string
 183         self.item_sort_key = item_sort_key
 184         if indent is not None and not isinstance(indent, basestring):
 185             indent = indent * ' '
 186         self.indent = indent
 187         if separators is not None:
 188             self.item_separator, self.key_separator = separators
 189         elif indent is not None:
 190             self.item_separator = ','
 191         if default is not None:
 192             self.default = default
 193         self.encoding = encoding
 194
 195     def default(self, o):
 196         """Implement this method in a subclass such that it returns
 197         a serializable object for ``o``, or calls the base implementation
 198         (to raise a ``TypeError``).
 199
 200         For example, to support arbitrary iterators, you could
 201         implement default like this::
 202
 203             def default(self, o):
 204                 try:
 205                     iterable = iter(o)
 206                 except TypeError:
 207                     pass
 208                 else:
 209                     return list(iterable)
 210                 return JSONEncoder.default(self, o)
 211
 212         """
 213         raise TypeError(repr(o) + " is not JSON serializable")
 214
 215     def encode(self, o):
 216         """Return a JSON string representation of a Python data structure.
 217
 218         >>> from simplejson import JSONEncoder
 219         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
 220         '{"foo": ["bar", "baz"]}'
 221
 222         """
 223         # This is for extremely simple cases and benchmarks.
 224         if isinstance(o, basestring):
 225             if isinstance(o, str):
 226                 _encoding = self.encoding
 227                 if (_encoding is not None
 228                         and not (_encoding == 'utf-8')):
 229                     o = o.decode(_encoding)
 230             if self.ensure_ascii:
 231                 return encode_basestring_ascii(o)
 232             else:
 233                 return encode_basestring(o)
 234         # This doesn't pass the iterator directly to ''.join() because the
 235         # exceptions aren't as detailed.  The list call should be roughly
 236         # equivalent to the PySequence_Fast that ''.join() would do.
 237         chunks = self.iterencode(o, _one_shot=True)
 238         if not isinstance(chunks, (list, tuple)):
 239             chunks = list(chunks)
 240         if self.ensure_ascii:
 241             return ''.join(chunks)
 242         else:
 243             return u''.join(chunks)
 244
 245     def iterencode(self, o, _one_shot=False):
 246         """Encode the given object and yield each string
 247         representation as available.
 248
 249         For example::
 250
 251             for chunk in JSONEncoder().iterencode(bigobject):
 252                 mysocket.write(chunk)
 253
 254         """
 255         if self.check_circular:
 256             markers = {}
 257         else:
 258             markers = None
 259         if self.ensure_ascii:
 260             _encoder = encode_basestring_ascii
 261         else:
 262             _encoder = encode_basestring
 263         if self.encoding != 'utf-8':
 264             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
 265                 if isinstance(o, str):
 266                     o = o.decode(_encoding)
 267                 return _orig_encoder(o)
 268
 269         def floatstr(o, allow_nan=self.allow_nan,
 270                 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
 271             # Check for specials. Note that this type of test is processor
 272             # and/or platform-specific, so do tests which don't depend on
 273             # the internals.
 274
 275             if o != o:
 276                 text = 'NaN'
 277             elif o == _inf:
 278                 text = 'Infinity'
 279             elif o == _neginf:
 280                 text = '-Infinity'
 281             else:
 282                 return _repr(o)
 283
 284             if not allow_nan:
 285                 raise ValueError(
 286                     "Out of range float values are not JSON compliant: " +
 287                     repr(o))
 288
 289             return text
 290
 291
 292         key_memo = {}
 293         if (_one_shot and c_make_encoder is not None
 294                 and self.indent is None):
 295             _iterencode = c_make_encoder(
 296                 markers, self.default, _encoder, self.indent,
 297                 self.key_separator, self.item_separator, self.sort_keys,
 298                 self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
 299                 self.namedtuple_as_object, self.tuple_as_array,
 300                 self.bigint_as_string, self.item_sort_key,
 301                 Decimal)
 302         else:
 303             _iterencode = _make_iterencode(
 304                 markers, self.default, _encoder, self.indent, floatstr,
 305                 self.key_separator, self.item_separator, self.sort_keys,
 306                 self.skipkeys, _one_shot, self.use_decimal,
 307                 self.namedtuple_as_object, self.tuple_as_array,
 308                 self.bigint_as_string, self.item_sort_key,
 309                 Decimal=Decimal)
 310         try:
 311             return _iterencode(o, 0)
 312         finally:
 313             key_memo.clear()
 314
 315
 316 class JSONEncoderForHTML(JSONEncoder):
 317     """An encoder that produces JSON safe to embed in HTML.
 318
 319     To embed JSON content in, say, a script tag on a web page, the
 320     characters &, < and > should be escaped. They cannot be escaped
 321     with the usual entities (e.g. &amp;) because they are not expanded
 322     within <script> tags.
 323     """
 324
 325     def encode(self, o):
 326         # Override JSONEncoder.encode because it has hacks for
 327         # performance that make things more complicated.
 328         chunks = self.iterencode(o, True)
 329         if self.ensure_ascii:
 330             return ''.join(chunks)
 331         else:
 332             return u''.join(chunks)
 333
 334     def iterencode(self, o, _one_shot=False):
 335         chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
 336         for chunk in chunks:
 337             chunk = chunk.replace('&', '\\u0026')
 338             chunk = chunk.replace('<', '\\u003c')
 339             chunk = chunk.replace('>', '\\u003e')
 340             yield chunk
 341
 342
 343 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
 344         _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
 345         _use_decimal, _namedtuple_as_object, _tuple_as_array,
 346         _bigint_as_string, _item_sort_key,
 347         ## HACK: hand-optimized bytecode; turn globals into locals
 348         False=False,
 349         True=True,
 350         ValueError=ValueError,
 351         basestring=basestring,
 352         Decimal=Decimal,
 353         dict=dict,
 354         float=float,
 355         id=id,
 356         int=int,
 357         isinstance=isinstance,
 358         list=list,
 359         long=long,
 360         str=str,
 361         tuple=tuple,
 362     ):
 363     if _item_sort_key and not callable(_item_sort_key):
 364         raise TypeError("item_sort_key must be None or callable")
 365
 366     def _iterencode_list(lst, _current_indent_level):
 367         if not lst:
 368             yield '[]'
 369             return
 370         if markers is not None:
 371             markerid = id(lst)
 372             if markerid in markers:
 373                 raise ValueError("Circular reference detected")
 374             markers[markerid] = lst
 375         buf = '['
 376         if _indent is not None:
 377             _current_indent_level += 1
 378             newline_indent = '\n' + (_indent * _current_indent_level)
 379             separator = _item_separator + newline_indent
 380             buf += newline_indent
 381         else:
 382             newline_indent = None
 383             separator = _item_separator
 384         first = True
 385         for value in lst:
 386             if first:
 387                 first = False
 388             else:
 389                 buf = separator
 390             if isinstance(value, basestring):
 391                 yield buf + _encoder(value)
 392             elif value is None:
 393                 yield buf + 'null'
 394             elif value is True:
 395                 yield buf + 'true'
 396             elif value is False:
 397                 yield buf + 'false'
 398             elif isinstance(value, (int, long)):
 399                 yield ((buf + str(value))
 400                        if (not _bigint_as_string or
 401                            (-1 << 53) < value < (1 << 53))
 402                            else (buf + '"' + str(value) + '"'))
 403             elif isinstance(value, float):
 404                 yield buf + _floatstr(value)
 405             elif _use_decimal and isinstance(value, Decimal):
 406                 yield buf + str(value)
 407             else:
 408                 yield buf
 409                 if isinstance(value, list):
 410                     chunks = _iterencode_list(value, _current_indent_level)
 411                 else:
 412                     _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
 413                     if _asdict and callable(_asdict):
 414                         chunks = _iterencode_dict(_asdict(),
 415                                                   _current_indent_level)
 416                     elif _tuple_as_array and isinstance(value, tuple):
 417                         chunks = _iterencode_list(value, _current_indent_level)
 418                     elif isinstance(value, dict):
 419                         chunks = _iterencode_dict(value, _current_indent_level)
 420                     else:
 421                         chunks = _iterencode(value, _current_indent_level)
 422                 for chunk in chunks:
 423                     yield chunk
 424         if newline_indent is not None:
 425             _current_indent_level -= 1
 426             yield '\n' + (_indent * _current_indent_level)
 427         yield ']'
 428         if markers is not None:
 429             del markers[markerid]
 430
 431     def _iterencode_dict(dct, _current_indent_level):
 432         if not dct:
 433             yield '{}'
 434             return
 435         if markers is not None:
 436             markerid = id(dct)
 437             if markerid in markers:
 438                 raise ValueError("Circular reference detected")
 439             markers[markerid] = dct
 440         yield '{'
 441         if _indent is not None:
 442             _current_indent_level += 1
 443             newline_indent = '\n' + (_indent * _current_indent_level)
 444             item_separator = _item_separator + newline_indent
 445             yield newline_indent
 446         else:
 447             newline_indent = None
 448             item_separator = _item_separator
 449         first = True
 450         if _item_sort_key:
 451             items = dct.items()
 452             items.sort(key=_item_sort_key)
 453         elif _sort_keys:
 454             items = dct.items()
 455             items.sort(key=lambda kv: kv[0])
 456         else:
 457             items = dct.iteritems()
 458         for key, value in items:
 459             if isinstance(key, basestring):
 460                 pass
 461             # JavaScript is weakly typed for these, so it makes sense to
 462             # also allow them.  Many encoders seem to do something like this.
 463             elif isinstance(key, float):
 464                 key = _floatstr(key)
 465             elif key is True:
 466                 key = 'true'
 467             elif key is False:
 468                 key = 'false'
 469             elif key is None:
 470                 key = 'null'
 471             elif isinstance(key, (int, long)):
 472                 key = str(key)
 473             elif _skipkeys:
 474                 continue
 475             else:
 476                 raise TypeError("key " + repr(key) + " is not a string")
 477             if first:
 478                 first = False
 479             else:
 480                 yield item_separator
 481             yield _encoder(key)
 482             yield _key_separator
 483             if isinstance(value, basestring):
 484                 yield _encoder(value)
 485             elif value is None:
 486                 yield 'null'
 487             elif value is True:
 488                 yield 'true'
 489             elif value is False:
 490                 yield 'false'
 491             elif isinstance(value, (int, long)):
 492                 yield (str(value)
 493                        if (not _bigint_as_string or
 494                            (-1 << 53) < value < (1 << 53))
 495                            else ('"' + str(value) + '"'))
 496             elif isinstance(value, float):
 497                 yield _floatstr(value)
 498             elif _use_decimal and isinstance(value, Decimal):
 499                 yield str(value)
 500             else:
 501                 if isinstance(value, list):
 502                     chunks = _iterencode_list(value, _current_indent_level)
 503                 else:
 504                     _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
 505                     if _asdict and callable(_asdict):
 506                         chunks = _iterencode_dict(_asdict(),
 507                                                   _current_indent_level)
 508                     elif _tuple_as_array and isinstance(value, tuple):
 509                         chunks = _iterencode_list(value, _current_indent_level)
 510                     elif isinstance(value, dict):
 511                         chunks = _iterencode_dict(value, _current_indent_level)
 512                     else:
 513                         chunks = _iterencode(value, _current_indent_level)
 514                 for chunk in chunks:
 515                     yield chunk
 516         if newline_indent is not None:
 517             _current_indent_level -= 1
 518             yield '\n' + (_indent * _current_indent_level)
 519         yield '}'
 520         if markers is not None:
 521             del markers[markerid]
 522
 523     def _iterencode(o, _current_indent_level):
 524         if isinstance(o, basestring):
 525             yield _encoder(o)
 526         elif o is None:
 527             yield 'null'
 528         elif o is True:
 529             yield 'true'
 530         elif o is False:
 531             yield 'false'
 532         elif isinstance(o, (int, long)):
 533             yield (str(o)
 534                    if (not _bigint_as_string or
 535                        (-1 << 53) < o < (1 << 53))
 536                        else ('"' + str(o) + '"'))
 537         elif isinstance(o, float):
 538             yield _floatstr(o)
 539         elif isinstance(o, list):
 540             for chunk in _iterencode_list(o, _current_indent_level):
 541                 yield chunk
 542         else:
 543             _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
 544             if _asdict and callable(_asdict):
 545                 for chunk in _iterencode_dict(_asdict(), _current_indent_level):
 546                     yield chunk
 547             elif (_tuple_as_array and isinstance(o, tuple)):
 548                 for chunk in _iterencode_list(o, _current_indent_level):
 549                     yield chunk
 550             elif isinstance(o, dict):
 551                 for chunk in _iterencode_dict(o, _current_indent_level):
 552                     yield chunk
 553             elif _use_decimal and isinstance(o, Decimal):
 554                 yield str(o)
 555             else:
 556                 if markers is not None:
 557                     markerid = id(o)
 558                     if markerid in markers:
 559                         raise ValueError("Circular reference detected")
 560                     markers[markerid] = o
 561                 o = _default(o)
 562                 for chunk in _iterencode(o, _current_indent_level):
 563                     yield chunk
 564                 if markers is not None:
 565                     del markers[markerid]
 566
 567     return _iterencode