third_party/simplejson/encoder.py

   1 """
   2 Implementation of JSONEncoder
   3 """
   4 import re
   5 try:
   6     from simplejson import _speedups
   7 except ImportError:
   8     _speedups = None
   9
  10 ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
  11 ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
  12 ESCAPE_DCT = {
  13     # escape all forward slashes to prevent </script> attack
  14     '/': '\\/',
  15     '\\': '\\\\',
  16     '"': '\\"',
  17     '\b': '\\b',
  18     '\f': '\\f',
  19     '\n': '\\n',
  20     '\r': '\\r',
  21     '\t': '\\t',
  22 }
  23 for i in range(0x20):
  24     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
  25
  26 # assume this produces an infinity on all machines (probably not guaranteed)
  27 INFINITY = float('1e66666')
  28
  29 def floatstr(o, allow_nan=True):
  30     # Check for specials.  Note that this type of test is processor- and/or
  31     # platform-specific, so do tests which don't depend on the internals.
  32
  33     if o != o:
  34         text = 'NaN'
  35     elif o == INFINITY:
  36         text = 'Infinity'
  37     elif o == -INFINITY:
  38         text = '-Infinity'
  39     else:
  40         return repr(o)
  41
  42     if not allow_nan:
  43         raise ValueError("Out of range float values are not JSON compliant: %r"
  44             % (o,))
  45
  46     return text
  47
  48
  49 def encode_basestring(s):
  50     """
  51     Return a JSON representation of a Python string
  52     """
  53     def replace(match):
  54         return ESCAPE_DCT[match.group(0)]
  55     return '"' + ESCAPE.sub(replace, s) + '"'
  56
  57 def encode_basestring_ascii(s):
  58     def replace(match):
  59         s = match.group(0)
  60         try:
  61             return ESCAPE_DCT[s]
  62         except KeyError:
  63             n = ord(s)
  64             if n < 0x10000:
  65                 return '\\u%04x' % (n,)
  66             else:
  67                 # surrogate pair
  68                 n -= 0x10000
  69                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
  70                 s2 = 0xdc00 | (n & 0x3ff)
  71                 return '\\u%04x\\u%04x' % (s1, s2)
  72     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
  73
  74 try:
  75     encode_basestring_ascii = _speedups.encode_basestring_ascii
  76     _need_utf8 = True
  77 except AttributeError:
  78     _need_utf8 = False
  79
  80 class JSONEncoder(object):
  81     """
  82     Extensible JSON <http://json.org> encoder for Python data structures.
  83
  84     Supports the following objects and types by default:
  85
  86     +-------------------+---------------+
  87     | Python            | JSON          |
  88     +===================+===============+
  89     | dict              | object        |
  90     +-------------------+---------------+
  91     | list, tuple       | array         |
  92     +-------------------+---------------+
  93     | str, unicode      | string        |
  94     +-------------------+---------------+
  95     | int, long, float  | number        |
  96     +-------------------+---------------+
  97     | True              | true          |
  98     +-------------------+---------------+
  99     | False             | false         |
 100     +-------------------+---------------+
 101     | None              | null          |
 102     +-------------------+---------------+
 103
 104     To extend this to recognize other objects, subclass and implement a
 105     ``.default()`` method with another method that returns a serializable
 106     object for ``o`` if possible, otherwise it should call the superclass
 107     implementation (to raise ``TypeError``).
 108     """
 109     __all__ = ['__init__', 'default', 'encode', 'iterencode']
 110     item_separator = ', '
 111     key_separator = ': '
 112     def __init__(self, skipkeys=False, ensure_ascii=True,
 113             check_circular=True, allow_nan=True, sort_keys=False,
 114             indent=None, separators=None, encoding='utf-8'):
 115         """
 116         Constructor for JSONEncoder, with sensible defaults.
 117
 118         If skipkeys is False, then it is a TypeError to attempt
 119         encoding of keys that are not str, int, long, float or None.  If
 120         skipkeys is True, such items are simply skipped.
 121
 122         If ensure_ascii is True, the output is guaranteed to be str
 123         objects with all incoming unicode characters escaped.  If
 124         ensure_ascii is false, the output will be unicode object.
 125
 126         If check_circular is True, then lists, dicts, and custom encoded
 127         objects will be checked for circular references during encoding to
 128         prevent an infinite recursion (which would cause an OverflowError).
 129         Otherwise, no such check takes place.
 130
 131         If allow_nan is True, then NaN, Infinity, and -Infinity will be
 132         encoded as such.  This behavior is not JSON specification compliant,
 133         but is consistent with most JavaScript based encoders and decoders.
 134         Otherwise, it will be a ValueError to encode such floats.
 135
 136         If sort_keys is True, then the output of dictionaries will be
 137         sorted by key; this is useful for regression tests to ensure
 138         that JSON serializations can be compared on a day-to-day basis.
 139
 140         If indent is a non-negative integer, then JSON array
 141         elements and object members will be pretty-printed with that
 142         indent level.  An indent level of 0 will only insert newlines.
 143         None is the most compact representation.
 144
 145         If specified, separators should be a (item_separator, key_separator)
 146         tuple. The default is (', ', ': '). To get the most compact JSON
 147         representation you should specify (',', ':') to eliminate whitespace.
 148
 149         If encoding is not None, then all input strings will be
 150         transformed into unicode using that encoding prior to JSON-encoding.
 151         The default is UTF-8.
 152         """
 153
 154         self.skipkeys = skipkeys
 155         self.ensure_ascii = ensure_ascii
 156         self.check_circular = check_circular
 157         self.allow_nan = allow_nan
 158         self.sort_keys = sort_keys
 159         self.indent = indent
 160         self.current_indent_level = 0
 161         if separators is not None:
 162             self.item_separator, self.key_separator = separators
 163         self.encoding = encoding
 164
 165     def _newline_indent(self):
 166         return '\n' + (' ' * (self.indent * self.current_indent_level))
 167
 168     def _iterencode_list(self, lst, markers=None):
 169         if not lst:
 170             yield '[]'
 171             return
 172         if markers is not None:
 173             markerid = id(lst)
 174             if markerid in markers:
 175                 raise ValueError("Circular reference detected")
 176             markers[markerid] = lst
 177         yield '['
 178         if self.indent is not None:
 179             self.current_indent_level += 1
 180             newline_indent = self._newline_indent()
 181             separator = self.item_separator + newline_indent
 182             yield newline_indent
 183         else:
 184             newline_indent = None
 185             separator = self.item_separator
 186         first = True
 187         for value in lst:
 188             if first:
 189                 first = False
 190             else:
 191                 yield separator
 192             for chunk in self._iterencode(value, markers):
 193                 yield chunk
 194         if newline_indent is not None:
 195             self.current_indent_level -= 1
 196             yield self._newline_indent()
 197         yield ']'
 198         if markers is not None:
 199             del markers[markerid]
 200
 201     def _iterencode_dict(self, dct, markers=None):
 202         if not dct:
 203             yield '{}'
 204             return
 205         if markers is not None:
 206             markerid = id(dct)
 207             if markerid in markers:
 208                 raise ValueError("Circular reference detected")
 209             markers[markerid] = dct
 210         yield '{'
 211         key_separator = self.key_separator
 212         if self.indent is not None:
 213             self.current_indent_level += 1
 214             newline_indent = self._newline_indent()
 215             item_separator = self.item_separator + newline_indent
 216             yield newline_indent
 217         else:
 218             newline_indent = None
 219             item_separator = self.item_separator
 220         first = True
 221         if self.ensure_ascii:
 222             encoder = encode_basestring_ascii
 223         else:
 224             encoder = encode_basestring
 225         allow_nan = self.allow_nan
 226         if self.sort_keys:
 227             keys = dct.keys()
 228             keys.sort()
 229             items = [(k, dct[k]) for k in keys]
 230         else:
 231             items = dct.iteritems()
 232         _encoding = self.encoding
 233         _do_decode = (_encoding is not None
 234             and not (_need_utf8 and _encoding == 'utf-8'))
 235         for key, value in items:
 236             if isinstance(key, str):
 237                 if _do_decode:
 238                     key = key.decode(_encoding)
 239             elif isinstance(key, basestring):
 240                 pass
 241             # JavaScript is weakly typed for these, so it makes sense to
 242             # also allow them.  Many encoders seem to do something like this.
 243             elif isinstance(key, float):
 244                 key = floatstr(key, allow_nan)
 245             elif isinstance(key, (int, long)):
 246                 key = str(key)
 247             elif key is True:
 248                 key = 'true'
 249             elif key is False:
 250                 key = 'false'
 251             elif key is None:
 252                 key = 'null'
 253             elif self.skipkeys:
 254                 continue
 255             else:
 256                 raise TypeError("key %r is not a string" % (key,))
 257             if first:
 258                 first = False
 259             else:
 260                 yield item_separator
 261             yield encoder(key)
 262             yield key_separator
 263             for chunk in self._iterencode(value, markers):
 264                 yield chunk
 265         if newline_indent is not None:
 266             self.current_indent_level -= 1
 267             yield self._newline_indent()
 268         yield '}'
 269         if markers is not None:
 270             del markers[markerid]
 271
 272     def _iterencode(self, o, markers=None):
 273         if isinstance(o, basestring):
 274             if self.ensure_ascii:
 275                 encoder = encode_basestring_ascii
 276             else:
 277                 encoder = encode_basestring
 278             _encoding = self.encoding
 279             if (_encoding is not None and isinstance(o, str)
 280                     and not (_need_utf8 and _encoding == 'utf-8')):
 281                 o = o.decode(_encoding)
 282             yield encoder(o)
 283         elif o is None:
 284             yield 'null'
 285         elif o is True:
 286             yield 'true'
 287         elif o is False:
 288             yield 'false'
 289         elif isinstance(o, (int, long)):
 290             yield str(o)
 291         elif isinstance(o, float):
 292             yield floatstr(o, self.allow_nan)
 293         elif isinstance(o, (list, tuple)):
 294             for chunk in self._iterencode_list(o, markers):
 295                 yield chunk
 296         elif isinstance(o, dict):
 297             for chunk in self._iterencode_dict(o, markers):
 298                 yield chunk
 299         else:
 300             if markers is not None:
 301                 markerid = id(o)
 302                 if markerid in markers:
 303                     raise ValueError("Circular reference detected")
 304                 markers[markerid] = o
 305             for chunk in self._iterencode_default(o, markers):
 306                 yield chunk
 307             if markers is not None:
 308                 del markers[markerid]
 309
 310     def _iterencode_default(self, o, markers=None):
 311         newobj = self.default(o)
 312         return self._iterencode(newobj, markers)
 313
 314     def default(self, o):
 315         """
 316         Implement this method in a subclass such that it returns
 317         a serializable object for ``o``, or calls the base implementation
 318         (to raise a ``TypeError``).
 319
 320         For example, to support arbitrary iterators, you could
 321         implement default like this::
 322
 323             def default(self, o):
 324                 try:
 325                     iterable = iter(o)
 326                 except TypeError:
 327                     pass
 328                 else:
 329                     return list(iterable)
 330                 return JSONEncoder.default(self, o)
 331         """
 332         raise TypeError("%r is not JSON serializable" % (o,))
 333
 334     def encode(self, o):
 335         """
 336         Return a JSON string representation of a Python data structure.
 337
 338         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
 339         '{"foo":["bar", "baz"]}'
 340         """
 341         # This is for extremely simple cases and benchmarks...
 342         if isinstance(o, basestring):
 343             if isinstance(o, str):
 344                 _encoding = self.encoding
 345                 if (_encoding is not None
 346                         and not (_encoding == 'utf-8' and _need_utf8)):
 347                     o = o.decode(_encoding)
 348             return encode_basestring_ascii(o)
 349         # This doesn't pass the iterator directly to ''.join() because it
 350         # sucks at reporting exceptions.  It's going to do this internally
 351         # anyway because it uses PySequence_Fast or similar.
 352         chunks = list(self.iterencode(o))
 353         return ''.join(chunks)
 354
 355     def iterencode(self, o):
 356         """
 357         Encode the given object and yield each string
 358         representation as available.
 359
 360         For example::
 361
 362             for chunk in JSONEncoder().iterencode(bigobject):
 363                 mysocket.write(chunk)
 364         """
 365         if self.check_circular:
 366             markers = {}
 367         else:
 368             markers = None
 369         return self._iterencode(o, markers)
 370
 371 __all__ = ['JSONEncoder']