2 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
5 * Converts to and from JSON format.
7 * JSON (JavaScript Object Notation) is a lightweight data-interchange
8 * format. It is easy for humans to read and write. It is easy for machines
9 * to parse and generate. It is based on a subset of the JavaScript
10 * Programming Language, Standard ECMA-262 3rd Edition - December 1999.
11 * This feature can also be found in Python. JSON is a text format that is
12 * completely language independent but uses conventions that are familiar
13 * to programmers of the C-family of languages, including C, C++, C#, Java,
14 * JavaScript, Perl, TCL, and many others. These properties make JSON an
15 * ideal data-interchange language.
17 * This package provides a simple encoder and decoder for JSON notation. It
18 * is intended for use with client-side Javascript applications that make
19 * use of HTTPRequest to perform server communication functions - data can
20 * be encoded into JSON notation for use in a client-side javascript, or
21 * decoded from incoming Javascript requests. JSON format is native to
22 * Javascript, and can be directly eval()'ed with no further parsing
25 * All strings should be in ASCII or UTF-8 format!
27 * LICENSE: Redistribution and use in source and binary forms, with or
28 * without modification, are permitted provided that the following
29 * conditions are met: Redistributions of source code must retain the
30 * above copyright notice, this list of conditions and the following
31 * disclaimer. Redistributions in binary form must reproduce the above
32 * copyright notice, this list of conditions and the following disclaimer
33 * in the documentation and/or other materials provided with the
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
38 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
39 * NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
41 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
42 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
44 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
45 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
49 * @package Services_JSON
50 * @author Michal Migurski <mike-json@teczno.com>
51 * @author Matt Knapp <mdknapp[at]gmail[dot]com>
52 * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
53 * @copyright 2005 Michal Migurski
55 * @license http://www.opensource.org/licenses/bsd-license.php
56 * @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198
60 * Marker constant for Services_JSON::decode(), used to flag stack state
62 define('SERVICES_JSON_SLICE', 1);
65 * Marker constant for Services_JSON::decode(), used to flag stack state
67 define('SERVICES_JSON_IN_STR', 2);
70 * Marker constant for Services_JSON::decode(), used to flag stack state
72 define('SERVICES_JSON_IN_ARR', 3);
75 * Marker constant for Services_JSON::decode(), used to flag stack state
77 define('SERVICES_JSON_IN_OBJ', 4);
80 * Marker constant for Services_JSON::decode(), used to flag stack state
82 define('SERVICES_JSON_IN_CMT', 5);
85 * Behavior switch for Services_JSON::decode()
87 define('SERVICES_JSON_LOOSE_TYPE', 16);
90 * Behavior switch for Services_JSON::decode()
92 define('SERVICES_JSON_SUPPRESS_ERRORS', 32);
95 * Converts to and from JSON format.
97 * Brief example of use:
100 * // create a new instance of Services_JSON
101 * $json = new Services_JSON();
103 * // convert a complexe value to JSON notation, and send it to the browser
104 * $value = array('foo', 'bar', array(1, 2, 'baz'), array(3, array(4)));
105 * $output = $json->encode($value);
108 * // prints: ["foo","bar",[1,2,"baz"],[3,[4]]]
110 * // accept incoming POST data, assumed to be in JSON notation
111 * $input = file_get_contents('php://input', 1000000);
112 * $value = $json->decode($input);
118 * constructs a new JSON instance
120 * @param int $use object behavior flags; combine with boolean-OR
123 * - SERVICES_JSON_LOOSE_TYPE: loose typing.
124 * "{...}" syntax creates associative arrays
125 * instead of objects in decode().
126 * - SERVICES_JSON_SUPPRESS_ERRORS: error suppression.
127 * Values which can't be encoded (e.g. resources)
128 * appear as NULL instead of throwing errors.
129 * By default, a deeply-nested resource will
130 * bubble up with an error, so all return values
131 * from encode() should be checked with isError()
133 function Services_JSON($use = 0)
139 * convert a string from one UTF-16 char to one UTF-8 char
141 * Normally should be handled by mb_convert_encoding, but
142 * provides a slower PHP-only method for installations
143 * that lack the multibye string extension.
145 * @param string $utf16 UTF-16 character
146 * @return string UTF-8 character
149 function utf162utf8($utf16)
151 // oh please oh please oh please oh please oh please
152 if(function_exists('mb_convert_encoding')) {
153 return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
156 $bytes = (ord($utf16{0}) << 8) |
ord($utf16{1});
159 case ((0x7F & $bytes) == $bytes):
160 // this case should never be reached, because we are in ASCII range
161 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
162 return chr(0x7F & $bytes);
164 case (0x07FF & $bytes) == $bytes:
165 // return a 2-byte UTF-8 character
166 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
167 return chr(0xC0 |
(($bytes >> 6) & 0x1F))
168 . chr(0x80 |
($bytes & 0x3F));
170 case (0xFFFF & $bytes) == $bytes:
171 // return a 3-byte UTF-8 character
172 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
173 return chr(0xE0 |
(($bytes >> 12) & 0x0F))
174 . chr(0x80 |
(($bytes >> 6) & 0x3F))
175 . chr(0x80 |
($bytes & 0x3F));
178 // ignoring UTF-32 for now, sorry
183 * convert a string from one UTF-8 char to one UTF-16 char
185 * Normally should be handled by mb_convert_encoding, but
186 * provides a slower PHP-only method for installations
187 * that lack the multibye string extension.
189 * @param string $utf8 UTF-8 character
190 * @return string UTF-16 character
193 function utf82utf16($utf8)
195 // oh please oh please oh please oh please oh please
196 if(function_exists('mb_convert_encoding')) {
197 return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');
200 switch(strlen($utf8)) {
202 // this case should never be reached, because we are in ASCII range
203 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
207 // return a UTF-16 character from a 2-byte UTF-8 char
208 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
209 return chr(0x07 & (ord($utf8{0}) >> 2))
210 . chr((0xC0 & (ord($utf8{0}) << 6))
211 |
(0x3F & ord($utf8{1})));
214 // return a UTF-16 character from a 3-byte UTF-8 char
215 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
216 return chr((0xF0 & (ord($utf8{0}) << 4))
217 |
(0x0F & (ord($utf8{1}) >> 2)))
218 . chr((0xC0 & (ord($utf8{1}) << 6))
219 |
(0x7F & ord($utf8{2})));
222 // ignoring UTF-32 for now, sorry
227 * encodes an arbitrary variable into JSON format
229 * @param mixed $var any number, boolean, string, array, or object to be encoded.
230 * see argument 1 to Services_JSON() above for array-parsing behavior.
231 * if var is a strng, note that encode() always expects it
232 * to be in ASCII or UTF-8 format!
234 * @return mixed JSON string representation of input var or an error if a problem occurs
237 function encode($var)
239 switch (gettype($var)) {
241 return $var ?
'true' : 'false';
254 // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
256 $strlen_var = strlen($var);
259 * Iterate over every character in the string,
260 * escaping with a slash or encoding to UTF-8 where necessary
262 for ($c = 0; $c < $strlen_var; ++
$c) {
264 $ord_var_c = ord($var{$c});
267 case $ord_var_c == 0x08:
270 case $ord_var_c == 0x09:
273 case $ord_var_c == 0x0A:
276 case $ord_var_c == 0x0C:
279 case $ord_var_c == 0x0D:
283 case $ord_var_c == 0x22:
284 case $ord_var_c == 0x2F:
285 case $ord_var_c == 0x5C:
286 // double quote, slash, slosh
287 $ascii .= '\\'.$var{$c};
290 case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
291 // characters U-00000000 - U-0000007F (same as ASCII)
295 case (($ord_var_c & 0xE0) == 0xC0):
296 // characters U-00000080 - U-000007FF, mask 110XXXXX
297 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
298 $char = pack('C*', $ord_var_c, ord($var{$c +
1}));
300 $utf16 = $this->utf82utf16($char);
301 $ascii .= sprintf('\u%04s', bin2hex($utf16));
304 case (($ord_var_c & 0xF0) == 0xE0):
305 // characters U-00000800 - U-0000FFFF, mask 1110XXXX
306 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
307 $char = pack('C*', $ord_var_c,
311 $utf16 = $this->utf82utf16($char);
312 $ascii .= sprintf('\u%04s', bin2hex($utf16));
315 case (($ord_var_c & 0xF8) == 0xF0):
316 // characters U-00010000 - U-001FFFFF, mask 11110XXX
317 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
318 $char = pack('C*', $ord_var_c,
323 $utf16 = $this->utf82utf16($char);
324 $ascii .= sprintf('\u%04s', bin2hex($utf16));
327 case (($ord_var_c & 0xFC) == 0xF8):
328 // characters U-00200000 - U-03FFFFFF, mask 111110XX
329 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
330 $char = pack('C*', $ord_var_c,
336 $utf16 = $this->utf82utf16($char);
337 $ascii .= sprintf('\u%04s', bin2hex($utf16));
340 case (($ord_var_c & 0xFE) == 0xFC):
341 // characters U-04000000 - U-7FFFFFFF, mask 1111110X
342 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
343 $char = pack('C*', $ord_var_c,
350 $utf16 = $this->utf82utf16($char);
351 $ascii .= sprintf('\u%04s', bin2hex($utf16));
356 return '"'.$ascii.'"';
360 * As per JSON spec if any array key is not an integer
361 * we must treat the the whole array as an object. We
362 * also try to catch a sparsely populated associative
363 * array with numeric keys here because some JS engines
364 * will create an array with empty indexes up to
365 * max_index which can cause memory issues and because
366 * the keys, which may be relevant, will be remapped
369 * As per the ECMA and JSON specification an object may
370 * have any string as a property. Unfortunately due to
371 * a hole in the ECMA specification if the key is a
372 * ECMA reserved word or starts with a digit the
373 * parameter is only accessible using ECMAScript's
377 // treat as a JSON object
378 if (is_array($var) && count($var) && (array_keys($var) !== range(0, sizeof($var) - 1))) {
379 $properties = array_map(array($this, 'name_value'),
383 foreach($properties as $property) {
384 if(Services_JSON
::isError($property)) {
389 return '{' . join(',', $properties) . '}';
392 // treat it like a regular array
393 $elements = array_map(array($this, 'encode'), $var);
395 foreach($elements as $element) {
396 if(Services_JSON
::isError($element)) {
401 return '[' . join(',', $elements) . ']';
404 $vars = get_object_vars($var);
406 $properties = array_map(array($this, 'name_value'),
408 array_values($vars));
410 foreach($properties as $property) {
411 if(Services_JSON
::isError($property)) {
416 return '{' . join(',', $properties) . '}';
419 return ($this->use & SERVICES_JSON_SUPPRESS_ERRORS
)
421 : new Services_JSON_Error(gettype($var)." can not be encoded as JSON string");
426 * array-walking function for use in generating JSON-formatted name-value pairs
428 * @param string $name name of key to use
429 * @param mixed $value reference to an array element to be encoded
431 * @return string JSON-formatted name-value pair, like '"name":value'
434 function name_value($name, $value)
436 $encoded_value = $this->encode($value);
438 if(Services_JSON
::isError($encoded_value)) {
439 return $encoded_value;
442 return $this->encode(strval($name)) . ':' . $encoded_value;
446 * reduce a string by removing leading and trailing comments and whitespace
448 * @param $str string string value to strip of comments and whitespace
450 * @return string string value stripped of comments and whitespace
453 function reduce_string($str)
455 $str = preg_replace(array(
457 // eliminate single line comments in '// ...' form
460 // eliminate multi-line comments in '/* ... */' form, at start of string
461 '#^\s*/\*(.+)\*/#Us',
463 // eliminate multi-line comments in '/* ... */' form, at end of string
468 // eliminate extraneous space
473 * decodes a JSON string into appropriate variable
475 * @param string $str JSON-formatted string
477 * @return mixed number, boolean, string, array, or object
478 * corresponding to given JSON input string.
479 * See argument 1 to Services_JSON() above for object-output behavior.
480 * Note that decode() always returns strings
481 * in ASCII or UTF-8 format!
484 function decode($str)
486 $str = $this->reduce_string($str);
488 switch (strtolower($str)) {
501 if (is_numeric($str)) {
502 // Lookie-loo, it's a number
504 // This would work on its own, but I'm trying to be
505 // good about returning integers where appropriate:
506 // return (float)$str;
508 // Return float or int, as appropriate
509 return ((float)$str == (integer)$str)
513 } elseif (preg_match('/^("|\').*(\1)$/s', $str, $m) && $m[1] == $m[2]) {
514 // STRINGS RETURNED IN UTF-8 FORMAT
515 $delim = substr($str, 0, 1);
516 $chrs = substr($str, 1, -1);
518 $strlen_chrs = strlen($chrs);
520 for ($c = 0; $c < $strlen_chrs; ++
$c) {
522 $substr_chrs_c_2 = substr($chrs, $c, 2);
523 $ord_chrs_c = ord($chrs{$c});
526 case $substr_chrs_c_2 == '\b':
530 case $substr_chrs_c_2 == '\t':
534 case $substr_chrs_c_2 == '\n':
538 case $substr_chrs_c_2 == '\f':
542 case $substr_chrs_c_2 == '\r':
547 case $substr_chrs_c_2 == '\\"':
548 case $substr_chrs_c_2 == '\\\'':
549 case $substr_chrs_c_2 == '\\\\':
550 case $substr_chrs_c_2 == '\\/':
551 if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
552 ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
553 $utf8 .= $chrs{++
$c};
557 case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
558 // single, escaped unicode character
559 $utf16 = chr(hexdec(substr($chrs, ($c +
2), 2)))
560 . chr(hexdec(substr($chrs, ($c +
4), 2)));
561 $utf8 .= $this->utf162utf8($utf16);
565 case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
569 case ($ord_chrs_c & 0xE0) == 0xC0:
570 // characters U-00000080 - U-000007FF, mask 110XXXXX
571 //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
572 $utf8 .= substr($chrs, $c, 2);
576 case ($ord_chrs_c & 0xF0) == 0xE0:
577 // characters U-00000800 - U-0000FFFF, mask 1110XXXX
578 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
579 $utf8 .= substr($chrs, $c, 3);
583 case ($ord_chrs_c & 0xF8) == 0xF0:
584 // characters U-00010000 - U-001FFFFF, mask 11110XXX
585 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
586 $utf8 .= substr($chrs, $c, 4);
590 case ($ord_chrs_c & 0xFC) == 0xF8:
591 // characters U-00200000 - U-03FFFFFF, mask 111110XX
592 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
593 $utf8 .= substr($chrs, $c, 5);
597 case ($ord_chrs_c & 0xFE) == 0xFC:
598 // characters U-04000000 - U-7FFFFFFF, mask 1111110X
599 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
600 $utf8 .= substr($chrs, $c, 6);
610 } elseif (preg_match('/^\[.*\]$/s', $str) ||
preg_match('/^\{.*\}$/s', $str)) {
611 // array, or object notation
613 if ($str{0} == '[') {
614 $stk = array(SERVICES_JSON_IN_ARR
);
617 if ($this->use & SERVICES_JSON_LOOSE_TYPE
) {
618 $stk = array(SERVICES_JSON_IN_OBJ
);
621 $stk = array(SERVICES_JSON_IN_OBJ
);
622 $obj = new stdClass();
626 array_push($stk, array('what' => SERVICES_JSON_SLICE
,
630 $chrs = substr($str, 1, -1);
631 $chrs = $this->reduce_string($chrs);
634 if (reset($stk) == SERVICES_JSON_IN_ARR
) {
643 //print("\nparsing {$chrs}\n");
645 $strlen_chrs = strlen($chrs);
647 for ($c = 0; $c <= $strlen_chrs; ++
$c) {
650 $substr_chrs_c_2 = substr($chrs, $c, 2);
652 if (($c == $strlen_chrs) ||
(($chrs{$c} == ',') && ($top['what'] == SERVICES_JSON_SLICE
))) {
653 // found a comma that is not inside a string, array, etc.,
654 // OR we've reached the end of the character list
655 $slice = substr($chrs, $top['where'], ($c - $top['where']));
656 array_push($stk, array('what' => SERVICES_JSON_SLICE
, 'where' => ($c +
1), 'delim' => false));
657 //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
659 if (reset($stk) == SERVICES_JSON_IN_ARR
) {
660 // we are in an array, so just push an element onto the stack
661 array_push($arr, $this->decode($slice));
663 } elseif (reset($stk) == SERVICES_JSON_IN_OBJ
) {
664 // we are in an object, so figure
665 // out the property name and set an
666 // element in an associative array,
670 if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
672 $key = $this->decode($parts[1]);
673 $val = $this->decode($parts[2]);
675 if ($this->use & SERVICES_JSON_LOOSE_TYPE
) {
680 } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
681 // name:value pair, where name is unquoted
683 $val = $this->decode($parts[2]);
685 if ($this->use & SERVICES_JSON_LOOSE_TYPE
) {
694 } elseif ((($chrs{$c} == '"') ||
($chrs{$c} == "'")) && ($top['what'] != SERVICES_JSON_IN_STR
)) {
695 // found a quote, and we are not inside a string
696 array_push($stk, array('what' => SERVICES_JSON_IN_STR
, 'where' => $c, 'delim' => $chrs{$c}));
697 //print("Found start of string at {$c}\n");
699 } elseif (($chrs{$c} == $top['delim']) &&
700 ($top['what'] == SERVICES_JSON_IN_STR
) &&
701 ((strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) %
2 != 1)) {
702 // found a quote, we're in a string, and it's not escaped
703 // we know that it's not escaped becase there is _not_ an
704 // odd number of backslashes at the end of the string so far
706 //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
708 } elseif (($chrs{$c} == '[') &&
709 in_array($top['what'], array(SERVICES_JSON_SLICE
, SERVICES_JSON_IN_ARR
, SERVICES_JSON_IN_OBJ
))) {
710 // found a left-bracket, and we are in an array, object, or slice
711 array_push($stk, array('what' => SERVICES_JSON_IN_ARR
, 'where' => $c, 'delim' => false));
712 //print("Found start of array at {$c}\n");
714 } elseif (($chrs{$c} == ']') && ($top['what'] == SERVICES_JSON_IN_ARR
)) {
715 // found a right-bracket, and we're in an array
717 //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
719 } elseif (($chrs{$c} == '{') &&
720 in_array($top['what'], array(SERVICES_JSON_SLICE
, SERVICES_JSON_IN_ARR
, SERVICES_JSON_IN_OBJ
))) {
721 // found a left-brace, and we are in an array, object, or slice
722 array_push($stk, array('what' => SERVICES_JSON_IN_OBJ
, 'where' => $c, 'delim' => false));
723 //print("Found start of object at {$c}\n");
725 } elseif (($chrs{$c} == '}') && ($top['what'] == SERVICES_JSON_IN_OBJ
)) {
726 // found a right-brace, and we're in an object
728 //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
730 } elseif (($substr_chrs_c_2 == '/*') &&
731 in_array($top['what'], array(SERVICES_JSON_SLICE
, SERVICES_JSON_IN_ARR
, SERVICES_JSON_IN_OBJ
))) {
732 // found a comment start, and we are in an array, object, or slice
733 array_push($stk, array('what' => SERVICES_JSON_IN_CMT
, 'where' => $c, 'delim' => false));
735 //print("Found start of comment at {$c}\n");
737 } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == SERVICES_JSON_IN_CMT
)) {
738 // found a comment end, and we're in one now
742 for ($i = $top['where']; $i <= $c; ++
$i)
743 $chrs = substr_replace($chrs, ' ', $i, 1);
745 //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
751 if (reset($stk) == SERVICES_JSON_IN_ARR
) {
754 } elseif (reset($stk) == SERVICES_JSON_IN_OBJ
) {
764 * @todo Ultimately, this should just call PEAR::isError()
766 function isError($data, $code = null)
768 if (class_exists('pear')) {
769 return PEAR
::isError($data, $code);
770 } elseif (is_object($data) && (get_class($data) == 'services_json_error' ||
771 is_subclass_of($data, 'services_json_error'))) {
779 if (class_exists('PEAR_Error')) {
781 class Services_JSON_Error
extends PEAR_Error
783 function Services_JSON_Error($message = 'unknown error', $code = null,
784 $mode = null, $options = null, $userinfo = null)
786 parent
::PEAR_Error($message, $code, $mode, $options, $userinfo);
793 * @todo Ultimately, this class shall be descended from PEAR_Error
795 class Services_JSON_Error
797 function Services_JSON_Error($message = 'unknown error', $code = null,
798 $mode = null, $options = null, $userinfo = null)