feedservice/simplejson/encoder.py

   1 """Implementation of JSONEncoder
   2 """
   3 import re
   4
   5 try:
   6     from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
   7 except ImportError:
   8     c_encode_basestring_ascii = None
   9 try:
  10     from simplejson._speedups import make_encoder as c_make_encoder
  11 except ImportError:
  12     c_make_encoder = None
  13
  14 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
  15 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
  16 HAS_UTF8 = re.compile(r'[\x80-\xff]')
  17 ESCAPE_DCT = {
  18     '\\': '\\\\',
  19     '"': '\\"',
  20     '\b': '\\b',
  21     '\f': '\\f',
  22     '\n': '\\n',
  23     '\r': '\\r',
  24     '\t': '\\t',
  25 }
  26 for i in range(0x20):
  27     #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
  28     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
  29
  30 # Assume this produces an infinity on all machines (probably not guaranteed)
  31 INFINITY = float('1e66666')
  32 FLOAT_REPR = repr
  33
  34 def encode_basestring(s):
  35     """Return a JSON representation of a Python string
  36
  37     """
  38     def replace(match):
  39         return ESCAPE_DCT[match.group(0)]
  40     return '"' + ESCAPE.sub(replace, s) + '"'
  41
  42
  43 def py_encode_basestring_ascii(s):
  44     """Return an ASCII-only JSON representation of a Python string
  45
  46     """
  47     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
  48         s = s.decode('utf-8')
  49     def replace(match):
  50         s = match.group(0)
  51         try:
  52             return ESCAPE_DCT[s]
  53         except KeyError:
  54             n = ord(s)
  55             if n < 0x10000:
  56                 #return '\\u{0:04x}'.format(n)
  57                 return '\\u%04x' % (n,)
  58             else:
  59                 # surrogate pair
  60                 n -= 0x10000
  61                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
  62                 s2 = 0xdc00 | (n & 0x3ff)
  63                 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
  64                 return '\\u%04x\\u%04x' % (s1, s2)
  65     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
  66
  67
  68 encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
  69
  70 class JSONEncoder(object):
  71     """Extensible JSON <http://json.org> encoder for Python data structures.
  72
  73     Supports the following objects and types by default:
  74
  75     +-------------------+---------------+
  76     | Python            | JSON          |
  77     +===================+===============+
  78     | dict              | object        |
  79     +-------------------+---------------+
  80     | list, tuple       | array         |
  81     +-------------------+---------------+
  82     | str, unicode      | string        |
  83     +-------------------+---------------+
  84     | int, long, float  | number        |
  85     +-------------------+---------------+
  86     | True              | true          |
  87     +-------------------+---------------+
  88     | False             | false         |
  89     +-------------------+---------------+
  90     | None              | null          |
  91     +-------------------+---------------+
  92
  93     To extend this to recognize other objects, subclass and implement a
  94     ``.default()`` method with another method that returns a serializable
  95     object for ``o`` if possible, otherwise it should call the superclass
  96     implementation (to raise ``TypeError``).
  97
  98     """
  99     item_separator = ', '
 100     key_separator = ': '
 101     def __init__(self, skipkeys=False, ensure_ascii=True,
 102             check_circular=True, allow_nan=True, sort_keys=False,
 103             indent=None, separators=None, encoding='utf-8', default=None):
 104         """Constructor for JSONEncoder, with sensible defaults.
 105
 106         If skipkeys is false, then it is a TypeError to attempt
 107         encoding of keys that are not str, int, long, float or None.  If
 108         skipkeys is True, such items are simply skipped.
 109
 110         If ensure_ascii is true, the output is guaranteed to be str
 111         objects with all incoming unicode characters escaped.  If
 112         ensure_ascii is false, the output will be unicode object.
 113
 114         If check_circular is true, then lists, dicts, and custom encoded
 115         objects will be checked for circular references during encoding to
 116         prevent an infinite recursion (which would cause an OverflowError).
 117         Otherwise, no such check takes place.
 118
 119         If allow_nan is true, then NaN, Infinity, and -Infinity will be
 120         encoded as such.  This behavior is not JSON specification compliant,
 121         but is consistent with most JavaScript based encoders and decoders.
 122         Otherwise, it will be a ValueError to encode such floats.
 123
 124         If sort_keys is true, then the output of dictionaries will be
 125         sorted by key; this is useful for regression tests to ensure
 126         that JSON serializations can be compared on a day-to-day basis.
 127
 128         If indent is a non-negative integer, then JSON array
 129         elements and object members will be pretty-printed with that
 130         indent level.  An indent level of 0 will only insert newlines.
 131         None is the most compact representation.
 132
 133         If specified, separators should be a (item_separator, key_separator)
 134         tuple.  The default is (', ', ': ').  To get the most compact JSON
 135         representation you should specify (',', ':') to eliminate whitespace.
 136
 137         If specified, default is a function that gets called for objects
 138         that can't otherwise be serialized.  It should return a JSON encodable
 139         version of the object or raise a ``TypeError``.
 140
 141         If encoding is not None, then all input strings will be
 142         transformed into unicode using that encoding prior to JSON-encoding.
 143         The default is UTF-8.
 144
 145         """
 146
 147         self.skipkeys = skipkeys
 148         self.ensure_ascii = ensure_ascii
 149         self.check_circular = check_circular
 150         self.allow_nan = allow_nan
 151         self.sort_keys = sort_keys
 152         self.indent = indent
 153         if separators is not None:
 154             self.item_separator, self.key_separator = separators
 155         if default is not None:
 156             self.default = default
 157         self.encoding = encoding
 158
 159     def default(self, o):
 160         """Implement this method in a subclass such that it returns
 161         a serializable object for ``o``, or calls the base implementation
 162         (to raise a ``TypeError``).
 163
 164         For example, to support arbitrary iterators, you could
 165         implement default like this::
 166
 167             def default(self, o):
 168                 try:
 169                     iterable = iter(o)
 170                 except TypeError:
 171                     pass
 172                 else:
 173                     return list(iterable)
 174                 return JSONEncoder.default(self, o)
 175
 176         """
 177         raise TypeError(repr(o) + " is not JSON serializable")
 178
 179     def encode(self, o):
 180         """Return a JSON string representation of a Python data structure.
 181
 182         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
 183         '{"foo": ["bar", "baz"]}'
 184
 185         """
 186         # This is for extremely simple cases and benchmarks.
 187         if isinstance(o, basestring):
 188             if isinstance(o, str):
 189                 _encoding = self.encoding
 190                 if (_encoding is not None
 191                         and not (_encoding == 'utf-8')):
 192                     o = o.decode(_encoding)
 193             if self.ensure_ascii:
 194                 return encode_basestring_ascii(o)
 195             else:
 196                 return encode_basestring(o)
 197         # This doesn't pass the iterator directly to ''.join() because the
 198         # exceptions aren't as detailed.  The list call should be roughly
 199         # equivalent to the PySequence_Fast that ''.join() would do.
 200         chunks = self.iterencode(o, _one_shot=True)
 201         if not isinstance(chunks, (list, tuple)):
 202             chunks = list(chunks)
 203         return ''.join(chunks)
 204
 205     def iterencode(self, o, _one_shot=False):
 206         """Encode the given object and yield each string
 207         representation as available.
 208
 209         For example::
 210
 211             for chunk in JSONEncoder().iterencode(bigobject):
 212                 mysocket.write(chunk)
 213
 214         """
 215         if self.check_circular:
 216             markers = {}
 217         else:
 218             markers = None
 219         if self.ensure_ascii:
 220             _encoder = encode_basestring_ascii
 221         else:
 222             _encoder = encode_basestring
 223         if self.encoding != 'utf-8':
 224             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
 225                 if isinstance(o, str):
 226                     o = o.decode(_encoding)
 227                 return _orig_encoder(o)
 228
 229         def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
 230             # Check for specials.  Note that this type of test is processor- and/or
 231             # platform-specific, so do tests which don't depend on the internals.
 232
 233             if o != o:
 234                 text = 'NaN'
 235             elif o == _inf:
 236                 text = 'Infinity'
 237             elif o == _neginf:
 238                 text = '-Infinity'
 239             else:
 240                 return _repr(o)
 241
 242             if not allow_nan:
 243                 raise ValueError(
 244                     "Out of range float values are not JSON compliant: " +
 245                     repr(o))
 246
 247             return text
 248
 249
 250         if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
 251             _iterencode = c_make_encoder(
 252                 markers, self.default, _encoder, self.indent,
 253                 self.key_separator, self.item_separator, self.sort_keys,
 254                 self.skipkeys, self.allow_nan)
 255         else:
 256             _iterencode = _make_iterencode(
 257                 markers, self.default, _encoder, self.indent, floatstr,
 258                 self.key_separator, self.item_separator, self.sort_keys,
 259                 self.skipkeys, _one_shot)
 260         return _iterencode(o, 0)
 261
 262 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
 263         ## HACK: hand-optimized bytecode; turn globals into locals
 264         False=False,
 265         True=True,
 266         ValueError=ValueError,
 267         basestring=basestring,
 268         dict=dict,
 269         float=float,
 270         id=id,
 271         int=int,
 272         isinstance=isinstance,
 273         list=list,
 274         long=long,
 275         str=str,
 276         tuple=tuple,
 277     ):
 278
 279     def _iterencode_list(lst, _current_indent_level):
 280         if not lst:
 281             yield '[]'
 282             return
 283         if markers is not None:
 284             markerid = id(lst)
 285             if markerid in markers:
 286                 raise ValueError("Circular reference detected")
 287             markers[markerid] = lst
 288         buf = '['
 289         if _indent is not None:
 290             _current_indent_level += 1
 291             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
 292             separator = _item_separator + newline_indent
 293             buf += newline_indent
 294         else:
 295             newline_indent = None
 296             separator = _item_separator
 297         first = True
 298         for value in lst:
 299             if first:
 300                 first = False
 301             else:
 302                 buf = separator
 303             if isinstance(value, basestring):
 304                 yield buf + _encoder(value)
 305             elif value is None:
 306                 yield buf + 'null'
 307             elif value is True:
 308                 yield buf + 'true'
 309             elif value is False:
 310                 yield buf + 'false'
 311             elif isinstance(value, (int, long)):
 312                 yield buf + str(value)
 313             elif isinstance(value, float):
 314                 yield buf + _floatstr(value)
 315             else:
 316                 yield buf
 317                 if isinstance(value, (list, tuple)):
 318                     chunks = _iterencode_list(value, _current_indent_level)
 319                 elif isinstance(value, dict):
 320                     chunks = _iterencode_dict(value, _current_indent_level)
 321                 else:
 322                     chunks = _iterencode(value, _current_indent_level)
 323                 for chunk in chunks:
 324                     yield chunk
 325         if newline_indent is not None:
 326             _current_indent_level -= 1
 327             yield '\n' + (' ' * (_indent * _current_indent_level))
 328         yield ']'
 329         if markers is not None:
 330             del markers[markerid]
 331
 332     def _iterencode_dict(dct, _current_indent_level):
 333         if not dct:
 334             yield '{}'
 335             return
 336         if markers is not None:
 337             markerid = id(dct)
 338             if markerid in markers:
 339                 raise ValueError("Circular reference detected")
 340             markers[markerid] = dct
 341         yield '{'
 342         if _indent is not None:
 343             _current_indent_level += 1
 344             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
 345             item_separator = _item_separator + newline_indent
 346             yield newline_indent
 347         else:
 348             newline_indent = None
 349             item_separator = _item_separator
 350         first = True
 351         if _sort_keys:
 352             items = dct.items()
 353             items.sort(key=lambda kv: kv[0])
 354         else:
 355             items = dct.iteritems()
 356         for key, value in items:
 357             if isinstance(key, basestring):
 358                 pass
 359             # JavaScript is weakly typed for these, so it makes sense to
 360             # also allow them.  Many encoders seem to do something like this.
 361             elif isinstance(key, float):
 362                 key = _floatstr(key)
 363             elif key is True:
 364                 key = 'true'
 365             elif key is False:
 366                 key = 'false'
 367             elif key is None:
 368                 key = 'null'
 369             elif isinstance(key, (int, long)):
 370                 key = str(key)
 371             elif _skipkeys:
 372                 continue
 373             else:
 374                 raise TypeError("key " + repr(key) + " is not a string")
 375             if first:
 376                 first = False
 377             else:
 378                 yield item_separator
 379             yield _encoder(key)
 380             yield _key_separator
 381             if isinstance(value, basestring):
 382                 yield _encoder(value)
 383             elif value is None:
 384                 yield 'null'
 385             elif value is True:
 386                 yield 'true'
 387             elif value is False:
 388                 yield 'false'
 389             elif isinstance(value, (int, long)):
 390                 yield str(value)
 391             elif isinstance(value, float):
 392                 yield _floatstr(value)
 393             else:
 394                 if isinstance(value, (list, tuple)):
 395                     chunks = _iterencode_list(value, _current_indent_level)
 396                 elif isinstance(value, dict):
 397                     chunks = _iterencode_dict(value, _current_indent_level)
 398                 else:
 399                     chunks = _iterencode(value, _current_indent_level)
 400                 for chunk in chunks:
 401                     yield chunk
 402         if newline_indent is not None:
 403             _current_indent_level -= 1
 404             yield '\n' + (' ' * (_indent * _current_indent_level))
 405         yield '}'
 406         if markers is not None:
 407             del markers[markerid]
 408
 409     def _iterencode(o, _current_indent_level):
 410         if isinstance(o, basestring):
 411             yield _encoder(o)
 412         elif o is None:
 413             yield 'null'
 414         elif o is True:
 415             yield 'true'
 416         elif o is False:
 417             yield 'false'
 418         elif isinstance(o, (int, long)):
 419             yield str(o)
 420         elif isinstance(o, float):
 421             yield _floatstr(o)
 422         elif isinstance(o, (list, tuple)):
 423             for chunk in _iterencode_list(o, _current_indent_level):
 424                 yield chunk
 425         elif isinstance(o, dict):
 426             for chunk in _iterencode_dict(o, _current_indent_level):
 427                 yield chunk
 428         else:
 429             if markers is not None:
 430                 markerid = id(o)
 431                 if markerid in markers:
 432                     raise ValueError("Circular reference detected")
 433                 markers[markerid] = o
 434             o = _default(o)
 435             for chunk in _iterencode(o, _current_indent_level):
 436                 yield chunk
 437             if markers is not None:
 438                 del markers[markerid]
 439
 440     return _iterencode