external_libraries/simplejson-2.3.2/encoder.py

   1 """Implementation of JSONEncoder
   2 """
   3 import re
   4 from decimal import Decimal
   5
   6 def _import_speedups():
   7     try:
   8         from . import _speedups
   9         return _speedups.encode_basestring_ascii, _speedups.make_encoder
  10     except ImportError:
  11         return None, None
  12 c_encode_basestring_ascii, c_make_encoder = _import_speedups()
  13
  14 from .decoder import PosInf
  15
  16 ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
  17 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
  18 HAS_UTF8 = re.compile(r'[\x80-\xff]')
  19 ESCAPE_DCT = {
  20     '\\': '\\\\',
  21     '"': '\\"',
  22     '\b': '\\b',
  23     '\f': '\\f',
  24     '\n': '\\n',
  25     '\r': '\\r',
  26     '\t': '\\t',
  27     u'\u2028': '\\u2028',
  28     u'\u2029': '\\u2029',
  29 }
  30 for i in range(0x20):
  31     #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
  32     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
  33
  34 FLOAT_REPR = repr
  35
  36 def encode_basestring(s):
  37     """Return a JSON representation of a Python string
  38
  39     """
  40     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
  41         s = s.decode('utf-8')
  42     def replace(match):
  43         return ESCAPE_DCT[match.group(0)]
  44     return u'"' + ESCAPE.sub(replace, s) + u'"'
  45
  46
  47 def py_encode_basestring_ascii(s):
  48     """Return an ASCII-only JSON representation of a Python string
  49
  50     """
  51     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
  52         s = s.decode('utf-8')
  53     def replace(match):
  54         s = match.group(0)
  55         try:
  56             return ESCAPE_DCT[s]
  57         except KeyError:
  58             n = ord(s)
  59             if n < 0x10000:
  60                 #return '\\u{0:04x}'.format(n)
  61                 return '\\u%04x' % (n,)
  62             else:
  63                 # surrogate pair
  64                 n -= 0x10000
  65                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
  66                 s2 = 0xdc00 | (n & 0x3ff)
  67                 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
  68                 return '\\u%04x\\u%04x' % (s1, s2)
  69     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
  70
  71
  72 encode_basestring_ascii = (
  73     c_encode_basestring_ascii or py_encode_basestring_ascii)
  74
  75 class JSONEncoder(object):
  76     """Extensible JSON <http://json.org> encoder for Python data structures.
  77
  78     Supports the following objects and types by default:
  79
  80     +-------------------+---------------+
  81     | Python            | JSON          |
  82     +===================+===============+
  83     | dict, namedtuple  | object        |
  84     +-------------------+---------------+
  85     | list, tuple       | array         |
  86     +-------------------+---------------+
  87     | str, unicode      | string        |
  88     +-------------------+---------------+
  89     | int, long, float  | number        |
  90     +-------------------+---------------+
  91     | True              | true          |
  92     +-------------------+---------------+
  93     | False             | false         |
  94     +-------------------+---------------+
  95     | None              | null          |
  96     +-------------------+---------------+
  97
  98     To extend this to recognize other objects, subclass and implement a
  99     ``.default()`` method with another method that returns a serializable
 100     object for ``o`` if possible, otherwise it should call the superclass
 101     implementation (to raise ``TypeError``).
 102
 103     """
 104     item_separator = ', '
 105     key_separator = ': '
 106     def __init__(self, skipkeys=False, ensure_ascii=True,
 107             check_circular=True, allow_nan=True, sort_keys=False,
 108             indent=None, separators=None, encoding='utf-8', default=None,
 109             use_decimal=True, namedtuple_as_object=True,
 110             tuple_as_array=True):
 111         """Constructor for JSONEncoder, with sensible defaults.
 112
 113         If skipkeys is false, then it is a TypeError to attempt
 114         encoding of keys that are not str, int, long, float or None.  If
 115         skipkeys is True, such items are simply skipped.
 116
 117         If ensure_ascii is true, the output is guaranteed to be str
 118         objects with all incoming unicode characters escaped.  If
 119         ensure_ascii is false, the output will be unicode object.
 120
 121         If check_circular is true, then lists, dicts, and custom encoded
 122         objects will be checked for circular references during encoding to
 123         prevent an infinite recursion (which would cause an OverflowError).
 124         Otherwise, no such check takes place.
 125
 126         If allow_nan is true, then NaN, Infinity, and -Infinity will be
 127         encoded as such.  This behavior is not JSON specification compliant,
 128         but is consistent with most JavaScript based encoders and decoders.
 129         Otherwise, it will be a ValueError to encode such floats.
 130
 131         If sort_keys is true, then the output of dictionaries will be
 132         sorted by key; this is useful for regression tests to ensure
 133         that JSON serializations can be compared on a day-to-day basis.
 134
 135         If indent is a string, then JSON array elements and object members
 136         will be pretty-printed with a newline followed by that string repeated
 137         for each level of nesting. ``None`` (the default) selects the most compact
 138         representation without any newlines. For backwards compatibility with
 139         versions of simplejson earlier than 2.1.0, an integer is also accepted
 140         and is converted to a string with that many spaces.
 141
 142         If specified, separators should be a (item_separator, key_separator)
 143         tuple.  The default is (', ', ': ').  To get the most compact JSON
 144         representation you should specify (',', ':') to eliminate whitespace.
 145
 146         If specified, default is a function that gets called for objects
 147         that can't otherwise be serialized.  It should return a JSON encodable
 148         version of the object or raise a ``TypeError``.
 149
 150         If encoding is not None, then all input strings will be
 151         transformed into unicode using that encoding prior to JSON-encoding.
 152         The default is UTF-8.
 153
 154         If use_decimal is true (not the default), ``decimal.Decimal`` will
 155         be supported directly by the encoder. For the inverse, decode JSON
 156         with ``parse_float=decimal.Decimal``.
 157
 158         If namedtuple_as_object is true (the default), objects with
 159         ``_asdict()`` methods will be encoded as JSON objects.
 160
 161         If tuple_as_array is true (the default), tuple (and subclasses) will
 162         be encoded as JSON arrays.
 163         """
 164
 165         self.skipkeys = skipkeys
 166         self.ensure_ascii = ensure_ascii
 167         self.check_circular = check_circular
 168         self.allow_nan = allow_nan
 169         self.sort_keys = sort_keys
 170         self.use_decimal = use_decimal
 171         self.namedtuple_as_object = namedtuple_as_object
 172         self.tuple_as_array = tuple_as_array
 173         if isinstance(indent, (int, long)):
 174             indent = ' ' * indent
 175         self.indent = indent
 176         if separators is not None:
 177             self.item_separator, self.key_separator = separators
 178         elif indent is not None:
 179             self.item_separator = ','
 180         if default is not None:
 181             self.default = default
 182         self.encoding = encoding
 183
 184     def default(self, o):
 185         """Implement this method in a subclass such that it returns
 186         a serializable object for ``o``, or calls the base implementation
 187         (to raise a ``TypeError``).
 188
 189         For example, to support arbitrary iterators, you could
 190         implement default like this::
 191
 192             def default(self, o):
 193                 try:
 194                     iterable = iter(o)
 195                 except TypeError:
 196                     pass
 197                 else:
 198                     return list(iterable)
 199                 return JSONEncoder.default(self, o)
 200
 201         """
 202         raise TypeError(repr(o) + " is not JSON serializable")
 203
 204     def encode(self, o):
 205         """Return a JSON string representation of a Python data structure.
 206
 207         >>> from simplejson import JSONEncoder
 208         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
 209         '{"foo": ["bar", "baz"]}'
 210
 211         """
 212         # This is for extremely simple cases and benchmarks.
 213         if isinstance(o, basestring):
 214             if isinstance(o, str):
 215                 _encoding = self.encoding
 216                 if (_encoding is not None
 217                         and not (_encoding == 'utf-8')):
 218                     o = o.decode(_encoding)
 219             if self.ensure_ascii:
 220                 return encode_basestring_ascii(o)
 221             else:
 222                 return encode_basestring(o)
 223         # This doesn't pass the iterator directly to ''.join() because the
 224         # exceptions aren't as detailed.  The list call should be roughly
 225         # equivalent to the PySequence_Fast that ''.join() would do.
 226         chunks = self.iterencode(o, _one_shot=True)
 227         if not isinstance(chunks, (list, tuple)):
 228             chunks = list(chunks)
 229         if self.ensure_ascii:
 230             return ''.join(chunks)
 231         else:
 232             return u''.join(chunks)
 233
 234     def iterencode(self, o, _one_shot=False):
 235         """Encode the given object and yield each string
 236         representation as available.
 237
 238         For example::
 239
 240             for chunk in JSONEncoder().iterencode(bigobject):
 241                 mysocket.write(chunk)
 242
 243         """
 244         if self.check_circular:
 245             markers = {}
 246         else:
 247             markers = None
 248         if self.ensure_ascii:
 249             _encoder = encode_basestring_ascii
 250         else:
 251             _encoder = encode_basestring
 252         if self.encoding != 'utf-8':
 253             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
 254                 if isinstance(o, str):
 255                     o = o.decode(_encoding)
 256                 return _orig_encoder(o)
 257
 258         def floatstr(o, allow_nan=self.allow_nan,
 259                 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
 260             # Check for specials. Note that this type of test is processor
 261             # and/or platform-specific, so do tests which don't depend on
 262             # the internals.
 263
 264             if o != o:
 265                 text = 'NaN'
 266             elif o == _inf:
 267                 text = 'Infinity'
 268             elif o == _neginf:
 269                 text = '-Infinity'
 270             else:
 271                 return _repr(o)
 272
 273             if not allow_nan:
 274                 raise ValueError(
 275                     "Out of range float values are not JSON compliant: " +
 276                     repr(o))
 277
 278             return text
 279
 280
 281         key_memo = {}
 282         if (_one_shot and c_make_encoder is not None
 283                 and self.indent is None):
 284             _iterencode = c_make_encoder(
 285                 markers, self.default, _encoder, self.indent,
 286                 self.key_separator, self.item_separator, self.sort_keys,
 287                 self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
 288                 self.namedtuple_as_object, self.tuple_as_array)
 289         else:
 290             _iterencode = _make_iterencode(
 291                 markers, self.default, _encoder, self.indent, floatstr,
 292                 self.key_separator, self.item_separator, self.sort_keys,
 293                 self.skipkeys, _one_shot, self.use_decimal,
 294                 self.namedtuple_as_object, self.tuple_as_array)
 295         try:
 296             return _iterencode(o, 0)
 297         finally:
 298             key_memo.clear()
 299
 300
 301 class JSONEncoderForHTML(JSONEncoder):
 302     """An encoder that produces JSON safe to embed in HTML.
 303
 304     To embed JSON content in, say, a script tag on a web page, the
 305     characters &, < and > should be escaped. They cannot be escaped
 306     with the usual entities (e.g. &amp;) because they are not expanded
 307     within <script> tags.
 308     """
 309
 310     def encode(self, o):
 311         # Override JSONEncoder.encode because it has hacks for
 312         # performance that make things more complicated.
 313         chunks = self.iterencode(o, True)
 314         if self.ensure_ascii:
 315             return ''.join(chunks)
 316         else:
 317             return u''.join(chunks)
 318
 319     def iterencode(self, o, _one_shot=False):
 320         chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
 321         for chunk in chunks:
 322             chunk = chunk.replace('&', '\\u0026')
 323             chunk = chunk.replace('<', '\\u003c')
 324             chunk = chunk.replace('>', '\\u003e')
 325             yield chunk
 326
 327
 328 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
 329         _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
 330         _use_decimal, _namedtuple_as_object, _tuple_as_array,
 331         ## HACK: hand-optimized bytecode; turn globals into locals
 332         False=False,
 333         True=True,
 334         ValueError=ValueError,
 335         basestring=basestring,
 336         Decimal=Decimal,
 337         dict=dict,
 338         float=float,
 339         id=id,
 340         int=int,
 341         isinstance=isinstance,
 342         list=list,
 343         long=long,
 344         str=str,
 345         tuple=tuple,
 346     ):
 347
 348     def _iterencode_list(lst, _current_indent_level):
 349         if not lst:
 350             yield '[]'
 351             return
 352         if markers is not None:
 353             markerid = id(lst)
 354             if markerid in markers:
 355                 raise ValueError("Circular reference detected")
 356             markers[markerid] = lst
 357         buf = '['
 358         if _indent is not None:
 359             _current_indent_level += 1
 360             newline_indent = '\n' + (_indent * _current_indent_level)
 361             separator = _item_separator + newline_indent
 362             buf += newline_indent
 363         else:
 364             newline_indent = None
 365             separator = _item_separator
 366         first = True
 367         for value in lst:
 368             if first:
 369                 first = False
 370             else:
 371                 buf = separator
 372             if isinstance(value, basestring):
 373                 yield buf + _encoder(value)
 374             elif value is None:
 375                 yield buf + 'null'
 376             elif value is True:
 377                 yield buf + 'true'
 378             elif value is False:
 379                 yield buf + 'false'
 380             elif isinstance(value, (int, long)):
 381                 yield buf + str(value)
 382             elif isinstance(value, float):
 383                 yield buf + _floatstr(value)
 384             elif _use_decimal and isinstance(value, Decimal):
 385                 yield buf + str(value)
 386             else:
 387                 yield buf
 388                 if isinstance(value, list):
 389                     chunks = _iterencode_list(value, _current_indent_level)
 390                 else:
 391                     _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
 392                     if _asdict and callable(_asdict):
 393                         chunks = _iterencode_dict(_asdict(),
 394                                                   _current_indent_level)
 395                     elif _tuple_as_array and isinstance(value, tuple):
 396                         chunks = _iterencode_list(value, _current_indent_level)
 397                     elif isinstance(value, dict):
 398                         chunks = _iterencode_dict(value, _current_indent_level)
 399                     else:
 400                         chunks = _iterencode(value, _current_indent_level)
 401                 for chunk in chunks:
 402                     yield chunk
 403         if newline_indent is not None:
 404             _current_indent_level -= 1
 405             yield '\n' + (_indent * _current_indent_level)
 406         yield ']'
 407         if markers is not None:
 408             del markers[markerid]
 409
 410     def _iterencode_dict(dct, _current_indent_level):
 411         if not dct:
 412             yield '{}'
 413             return
 414         if markers is not None:
 415             markerid = id(dct)
 416             if markerid in markers:
 417                 raise ValueError("Circular reference detected")
 418             markers[markerid] = dct
 419         yield '{'
 420         if _indent is not None:
 421             _current_indent_level += 1
 422             newline_indent = '\n' + (_indent * _current_indent_level)
 423             item_separator = _item_separator + newline_indent
 424             yield newline_indent
 425         else:
 426             newline_indent = None
 427             item_separator = _item_separator
 428         first = True
 429         if _sort_keys:
 430             items = dct.items()
 431             items.sort(key=lambda kv: kv[0])
 432         else:
 433             items = dct.iteritems()
 434         for key, value in items:
 435             if isinstance(key, basestring):
 436                 pass
 437             # JavaScript is weakly typed for these, so it makes sense to
 438             # also allow them.  Many encoders seem to do something like this.
 439             elif isinstance(key, float):
 440                 key = _floatstr(key)
 441             elif key is True:
 442                 key = 'true'
 443             elif key is False:
 444                 key = 'false'
 445             elif key is None:
 446                 key = 'null'
 447             elif isinstance(key, (int, long)):
 448                 key = str(key)
 449             elif _skipkeys:
 450                 continue
 451             else:
 452                 raise TypeError("key " + repr(key) + " is not a string")
 453             if first:
 454                 first = False
 455             else:
 456                 yield item_separator
 457             yield _encoder(key)
 458             yield _key_separator
 459             if isinstance(value, basestring):
 460                 yield _encoder(value)
 461             elif value is None:
 462                 yield 'null'
 463             elif value is True:
 464                 yield 'true'
 465             elif value is False:
 466                 yield 'false'
 467             elif isinstance(value, (int, long)):
 468                 yield str(value)
 469             elif isinstance(value, float):
 470                 yield _floatstr(value)
 471             elif _use_decimal and isinstance(value, Decimal):
 472                 yield str(value)
 473             else:
 474                 if isinstance(value, list):
 475                     chunks = _iterencode_list(value, _current_indent_level)
 476                 else:
 477                     _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
 478                     if _asdict and callable(_asdict):
 479                         chunks = _iterencode_dict(_asdict(),
 480                                                   _current_indent_level)
 481                     elif _tuple_as_array and isinstance(value, tuple):
 482                         chunks = _iterencode_list(value, _current_indent_level)
 483                     elif isinstance(value, dict):
 484                         chunks = _iterencode_dict(value, _current_indent_level)
 485                     else:
 486                         chunks = _iterencode(value, _current_indent_level)
 487                 for chunk in chunks:
 488                     yield chunk
 489         if newline_indent is not None:
 490             _current_indent_level -= 1
 491             yield '\n' + (_indent * _current_indent_level)
 492         yield '}'
 493         if markers is not None:
 494             del markers[markerid]
 495
 496     def _iterencode(o, _current_indent_level):
 497         if isinstance(o, basestring):
 498             yield _encoder(o)
 499         elif o is None:
 500             yield 'null'
 501         elif o is True:
 502             yield 'true'
 503         elif o is False:
 504             yield 'false'
 505         elif isinstance(o, (int, long)):
 506             yield str(o)
 507         elif isinstance(o, float):
 508             yield _floatstr(o)
 509         elif isinstance(o, list):
 510             for chunk in _iterencode_list(o, _current_indent_level):
 511                 yield chunk
 512         else:
 513             _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
 514             if _asdict and callable(_asdict):
 515                 for chunk in _iterencode_dict(_asdict(), _current_indent_level):
 516                     yield chunk
 517             elif (_tuple_as_array and isinstance(o, tuple)):
 518                 for chunk in _iterencode_list(o, _current_indent_level):
 519                     yield chunk
 520             elif isinstance(o, dict):
 521                 for chunk in _iterencode_dict(o, _current_indent_level):
 522                     yield chunk
 523             elif _use_decimal and isinstance(o, Decimal):
 524                 yield str(o)
 525             else:
 526                 if markers is not None:
 527                     markerid = id(o)
 528                     if markerid in markers:
 529                         raise ValueError("Circular reference detected")
 530                     markers[markerid] = o
 531                 o = _default(o)
 532                 for chunk in _iterencode(o, _current_indent_level):
 533                     yield chunk
 534                 if markers is not None:
 535                     del markers[markerid]
 536
 537     return _iterencode