From 79f80cc49571e286b89fc1666c034262611fac7f Mon Sep 17 00:00:00 2001 From: Kevin Israel Date: Wed, 20 Feb 2013 21:30:45 -0500 Subject: [PATCH] Combine JavaScript and JSON encoding logic This will help with improving human readability of JS and JSON objects encoded by both ResourceLoader and the API. This patch also adds new "utf8" parameter to the JSON formatter of the API. Changes to FormatJson class: * Added escaping of '<', '>', and '&' by default to protect against XSS. * Removed unnecessary escaping of '/' and added an additional option to unescape non-ASCII characters (those above U+007F) as well. * Added PHP 5.3 pretty printing code (to replace Services_JSON) that uses a four-space indent as PHP 5.4 does. Changes to Xml class: * Defined Xml::encodeJsVar() in terms of FormatJson::encode() and added a pretty printing option. Also added a pretty printing option to Xml::encodeJsCall() as well. * Deprecated Xml::escapeJsString() and QuickTemplate::jstext(); callers have to add quotes themselves, hence the escaping of both double quotes and apostrophes. Bug: 26818 Change-Id: I1987190f1ba5bf41738e7bd611209706c1f6bb5c --- RELEASE-NOTES-1.21 | 14 + includes/Article.php | 5 +- includes/AutoLoader.php | 2 - includes/SkinTemplate.php | 2 + includes/Xml.php | 80 +- includes/api/ApiFormatJson.php | 24 +- includes/json/FormatJson.php | 179 ++++- includes/json/Services_JSON.php | 882 --------------------- includes/resourceloader/ResourceLoader.php | 6 +- .../ResourceLoaderUserOptionsModule.php | 4 +- .../ResourceLoaderUserTokensModule.php | 4 +- tests/phpunit/includes/JsonTest.php | 27 - tests/phpunit/includes/json/FormatJsonTest.php | 161 ++++ tests/phpunit/includes/json/ServicesJsonTest.php | 93 --- tests/qunit/data/load.mock.php | 3 +- 15 files changed, 390 insertions(+), 1096 deletions(-) delete mode 100644 includes/json/Services_JSON.php delete mode 100644 tests/phpunit/includes/JsonTest.php create mode 100644 tests/phpunit/includes/json/FormatJsonTest.php delete mode 100644 tests/phpunit/includes/json/ServicesJsonTest.php diff --git a/RELEASE-NOTES-1.21 b/RELEASE-NOTES-1.21 index 8844f9b1458..efa534a2f9a 100644 --- a/RELEASE-NOTES-1.21 +++ b/RELEASE-NOTES-1.21 @@ -281,6 +281,9 @@ production. to action=feedwatchlist * WDDX formatted output will actually be formatted (and normal output will no longer be), and will no longer choke on booleans. +* The JSON output formatter now leaves forward slashes unescaped to improve human + readability of URLs and similar strings. Also, a "utf8" option is now provided + to use UTF-8 encoding instead of hex escape codes for most non-ASCII characters. === API internal changes in 1.21 === * For debugging only, a new global $wgDebugAPI removes many API restrictions when true. @@ -336,6 +339,17 @@ changes to languages because of Bugzilla reports. * BREAKING CHANGE: (bug 38244) Removed the mediawiki.api.titleblacklist module and moved it to the TitleBlacklist extension. * The Special:ActiveUsers special page was removed +* BREAKING CHANGE: Implementation of MediaWiki's JS and JSON value encoding + has changed: +** MediaWiki no longer supports PHP installations in which the native JSON + extension is missing or disabled. +** XmlJsCode objects can no longer be nested inside objects or arrays. + (For Xml::encodeJsCall(), this individually applies to each argument.) +** The sets of characters escaped by default, along with the precise escape + sequences used, have changed (except for the Xml::escapeJsString() + function, which is now deprecated). +* BREAKING CHANGE: The Services_JSON class has been removed; if necessary, + be sure to upgrade affected extensions at the same time (e.g. Collection). == Compatibility == diff --git a/includes/Article.php b/includes/Article.php index 793da7f75a0..d2f52bcf0e0 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -988,8 +988,9 @@ class Article implements Page { // Set the fragment if one was specified in the redirect if ( strval( $this->getTitle()->getFragment() ) != '' ) { - $fragment = Xml::escapeJsString( $this->getTitle()->getFragmentForURL() ); - $outputPage->addInlineScript( "redirectToFragment(\"$fragment\");" ); + $outputPage->addInlineScript( Xml::encodeJsCall( + 'redirectToFragment', array( $this->getTitle()->getFragmentForURL() ) + ) ); } // Add a tag diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 7b6b3e071ca..0b3c788d618 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -680,8 +680,6 @@ $wgAutoloadLocalClasses = array( # includes/json 'FormatJson' => 'includes/json/FormatJson.php', - 'Services_JSON' => 'includes/json/Services_JSON.php', - 'Services_JSON_Error' => 'includes/json/Services_JSON.php', # includes/libs 'CSSJanus' => 'includes/libs/CSSJanus.php', diff --git a/includes/SkinTemplate.php b/includes/SkinTemplate.php index 7147ba97dd3..fee3058ac15 100644 --- a/includes/SkinTemplate.php +++ b/includes/SkinTemplate.php @@ -1322,8 +1322,10 @@ abstract class QuickTemplate { /** * @private + * @deprecated since 1.21; use Xml::encodeJsVar() or Xml::encodeJsCall() instead */ function jstext( $str ) { + wfDeprecated( __METHOD__, '1.21' ); echo Xml::escapeJsString( $this->data[$str] ); } diff --git a/includes/Xml.php b/includes/Xml.php index 8697962627f..7cecf3af2ee 100644 --- a/includes/Xml.php +++ b/includes/Xml.php @@ -590,6 +590,7 @@ class Xml { * for JavaScript source code. * Illegal control characters are assumed not to be present. * + * @deprecated since 1.21; use Xml::encodeJsVar() or Xml::encodeJsCall() instead * @param string $string to escape * @return String */ @@ -621,72 +622,46 @@ class Xml { } /** - * Encode a variable of unknown type to JavaScript. - * Arrays are converted to JS arrays, objects are converted to JS associative - * arrays (objects). So cast your PHP associative arrays to objects before - * passing them to here. + * Encode a variable of arbitrary type to JavaScript. + * If the value is an XmlJsCode object, pass through the object's value verbatim. * - * @param $value + * @note Only use this function for generating JavaScript code. If generating output + * for a proper JSON parser, just call FormatJson::encode() directly. * - * @return string + * @param mixed $value The value being encoded. Can be any type except a resource. + * @param bool $pretty If true, add non-significant whitespace to improve readability. + * @return string|bool: String if successful; false upon failure */ - public static function encodeJsVar( $value ) { - if ( is_bool( $value ) ) { - $s = $value ? 'true' : 'false'; - } elseif ( is_null( $value ) ) { - $s = 'null'; - } elseif ( is_int( $value ) || is_float( $value ) ) { - $s = strval( $value ); - } elseif ( is_array( $value ) && // Make sure it's not associative. - array_keys( $value ) === range( 0, count( $value ) - 1 ) || - count( $value ) == 0 - ) { - $s = '['; - foreach ( $value as $elt ) { - if ( $s != '[' ) { - $s .= ','; - } - $s .= self::encodeJsVar( $elt ); - } - $s .= ']'; - } elseif ( $value instanceof XmlJsCode ) { - $s = $value->value; - } elseif ( is_object( $value ) || is_array( $value ) ) { - // Objects and associative arrays - $s = '{'; - foreach ( (array)$value as $name => $elt ) { - if ( $s != '{' ) { - $s .= ','; - } - - $s .= '"' . self::escapeJsString( $name ) . '":' . - self::encodeJsVar( $elt ); - } - $s .= '}'; - } else { - $s = '"' . self::escapeJsString( $value ) . '"'; + public static function encodeJsVar( $value, $pretty = false ) { + if ( $value instanceof XmlJsCode ) { + return $value->value; } - return $s; + return FormatJson::encode( $value, $pretty, FormatJson::UTF8_OK ); } /** * Create a call to a JavaScript function. The supplied arguments will be * encoded using Xml::encodeJsVar(). * + * @since 1.17 * @param string $name The name of the function to call, or a JavaScript expression * which evaluates to a function object which is called. - * @param array $args of arguments to pass to the function. - * - * @since 1.17 - * - * @return string + * @param array $args The arguments to pass to the function. + * @param bool $pretty If true, add non-significant whitespace to improve readability. + * @return string|bool: String if successful; false upon failure */ - public static function encodeJsCall( $name, $args ) { + public static function encodeJsCall( $name, $args, $pretty = false ) { foreach ( $args as &$arg ) { - $arg = Xml::encodeJsVar( $arg ); + $arg = Xml::encodeJsVar( $arg, $pretty ); + if ( $arg === false ) { + return false; + } } - return "$name(" . implode( ', ', $args ) . ");\n"; + return "$name(" . ( $pretty + ? ( ' ' . implode( ', ', $args ) . ' ' ) + : implode( ',', $args ) + ) . ");"; } /** @@ -963,6 +938,11 @@ class XmlSelect { * Xml::encodeJsVar( new XmlJsCode( 'a + b' ) ); * * Returns "a + b". + * + * @note As of 1.21, XmlJsCode objects cannot be nested inside objects or arrays. The sole + * exception is the $args argument to Xml::encodeJsCall() because Xml::encodeJsVar() is + * called for each individual element in that array. + * * @since 1.17 */ class XmlJsCode { diff --git a/includes/api/ApiFormatJson.php b/includes/api/ApiFormatJson.php index abb63480afc..342a580f9a4 100644 --- a/includes/api/ApiFormatJson.php +++ b/includes/api/ApiFormatJson.php @@ -56,30 +56,32 @@ class ApiFormatJson extends ApiFormatBase { } public function execute() { - $prefix = $suffix = ''; - $params = $this->extractRequestParams(); + $json = FormatJson::encode( + $this->getResultData(), + $this->getIsHtml(), + $params['utf8'] ? FormatJson::ALL_OK : FormatJson::XMLMETA_OK + ); $callback = $params['callback']; - if ( !is_null( $callback ) ) { - $prefix = preg_replace( "/[^][.\\'\\\"_A-Za-z0-9]/", '', $callback ) . '('; - $suffix = ')'; + if ( $callback !== null ) { + $callback = preg_replace( "/[^][.\\'\\\"_A-Za-z0-9]/", '', $callback ); + $this->printText( "$callback($json)" ); + } else { + $this->printText( $json ); } - $this->printText( - $prefix . - FormatJson::encode( $this->getResultData(), $this->getIsHtml() ) . - $suffix - ); } public function getAllowedParams() { return array( - 'callback' => null, + 'callback' => null, + 'utf8' => false, ); } public function getParamDescription() { return array( 'callback' => 'If specified, wraps the output into a given function call. For safety, all user-specific data will be restricted.', + 'utf8' => 'If specified, encodes most (but not all) non-ASCII characters as UTF-8 instead of replacing them with hexadecimal escape sequences.', ); } diff --git a/includes/json/FormatJson.php b/includes/json/FormatJson.php index eececcba53f..013d58966da 100644 --- a/includes/json/FormatJson.php +++ b/includes/json/FormatJson.php @@ -1,6 +1,6 @@ ', and '&', which have special meanings in + * HTML and XML. + * + * @warning Do not use this option for JSON that could end up in inline scripts. + * - HTML5, §4.3.1.2 Restrictions for contents of script elements + * - XML 1.0 (5th Ed.), §2.4 Character Data and Markup + * + * @since 1.21 + */ + const XMLMETA_OK = 2; + + /** + * Skip escaping as many characters as reasonably possible. + * + * @warning When generating inline script blocks, use FormatJson::UTF8_OK instead. + * + * @since 1.21 + */ + const ALL_OK = 3; + + /** + * Characters problematic in JavaScript and their corresponding escape sequences. + * + * @note These are listed in ECMA-262 (5.1 Ed.), §7.3 Line Terminators along with U+000A (LF) + * and U+000D (CR). However, PHP already escapes LF and CR according to RFC 4627. + */ + private static $badChars = array( + "\xe2\x80\xa8" => '\u2028', // LINE SEPARATOR + "\xe2\x80\xa9" => '\u2029', // PARAGRAPH SEPARATOR + ); + + /** * Returns the JSON representation of a value. * - * @param $value Mixed: the value being encoded. Can be any type except a resource. - * @param $pretty Boolean: If true, adds non-significant whitespace to improve readability. + * @note Empty arrays are encoded as numeric arrays, not as objects, so cast any associative + * array that might be empty to an object before encoding it. * - * @return string + * @note In pre-1.21 versions of MediaWiki, using this function for generating inline script + * blocks may result in an XSS vulnerability, and quite likely will in XML documents + * (cf. FormatJson::XMLMETA_OK). Use Xml::encodeJsVar() instead in such cases. + * + * @param mixed $value The value to encode. Can be any type except a resource. + * @param bool $pretty If true, add non-significant whitespace to improve readability. + * @param int $escaping Bitfield consisting of _OK class constants + * @return string|bool: String if successful; false upon failure */ - public static function encode( $value, $pretty = false ) { - if ( !function_exists( 'json_encode' ) || ( $pretty && version_compare( PHP_VERSION, '5.4.0', '<' ) ) ) { - $json = new Services_JSON(); - return $json->encode( $value, $pretty ); - } else { - return json_encode( $value, $pretty ? JSON_PRETTY_PRINT : 0 ); + public static function encode( $value, $pretty = false, $escaping = 0 ) { + if ( version_compare( PHP_VERSION, '5.4.0', '<' ) ) { + return self::encode53( $value, $pretty, $escaping ); } + return self::encode54( $value, $pretty, $escaping ); } /** * Decodes a JSON string. * - * @param string $value the json string being decoded. - * @param $assoc Boolean: when true, returned objects will be converted into associative arrays. + * @param string $value The JSON string being decoded + * @param bool $assoc When true, returned objects will be converted into associative arrays. * - * @return Mixed: the value encoded in json in appropriate PHP type. - * Values true, false and null (case-insensitive) are returned as true, false - * and "&null;" respectively. "&null;" is returned if the json cannot be + * @return mixed: the value encoded in JSON in appropriate PHP type. + * Values `"true"`, `"false"`, and `"null"` (case-insensitive) are returned as `true`, `false` + * and `null` respectively. `null` is returned if the JSON cannot be * decoded or if the encoded data is deeper than the recursion limit. */ public static function decode( $value, $assoc = false ) { - if ( !function_exists( 'json_decode' ) ) { - $json = $assoc ? new Services_JSON( SERVICES_JSON_LOOSE_TYPE ) : - new Services_JSON(); - $jsonDec = $json->decode( $value ); - return $jsonDec; - } else { - return json_decode( $value, $assoc ); + return json_decode( $value, $assoc ); + } + + /** + * JSON encoder wrapper for PHP >= 5.4, which supports useful encoding options. + * + * @param mixed $value + * @param bool $pretty + * @param int $escaping + * @return string|bool + */ + private static function encode54( $value, $pretty, $escaping ) { + // PHP escapes '/' to prevent breaking out of inline script blocks using '', + // which is hardly useful when '<' and '>' are escaped, and such escaping negatively + // impacts the human readability of URLs and similar strings. + $options = JSON_UNESCAPED_SLASHES; + $options |= $pretty ? JSON_PRETTY_PRINT : 0; + $options |= ( $escaping & self::UTF8_OK ) ? JSON_UNESCAPED_UNICODE : 0; + $options |= ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP ); + $json = json_encode( $value, $options ); + if ( $json === false ) { + return false; } + return ( $escaping & self::UTF8_OK ) ? strtr( $json, self::$badChars ) : $json; } + /** + * JSON encoder wrapper for PHP 5.3, which lacks native support for some encoding options. + * Therefore, the missing options are implemented here purely in PHP code. + * + * @param mixed $value + * @param bool $pretty + * @param int $escaping + * @return string|bool + */ + private static function encode53( $value, $pretty, $escaping ) { + $options = ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP ); + $json = json_encode( $value, $options ); + if ( $json === false ) { + return false; + } + $json = str_replace( '\\/', '/', $json ); // emulate JSON_UNESCAPED_SLASHES + if ( $escaping & self::UTF8_OK ) { + // JSON hex escape sequences follow the format \uDDDD, where DDDD is four hex digits + // indicating the equivalent UTF-16 code unit's value. To most efficiently unescape + // them, we exploit the JSON extension's built-in decoder. + // * We escape the input a second time, so any such sequence becomes \\uDDDD. + // * To avoid interpreting escape sequences that were in the original input, + // each double-escaped backslash (\\\\) is replaced with \\\u005c. + // * We strip one of the backslashes from each of the escape sequences to unescape. + // * Then the JSON decoder can perform the actual unescaping. + $doubled = str_replace( "\\\\\\\\", "\\\\\\u005c", json_encode( $json ) ); + $json = json_decode( preg_replace( "/\\\\\\\\u(?!00[0-7])/", "\\\\u", $doubled ) ); + $json = strtr( $json, self::$badChars ); + } + return $pretty ? self::prettyPrint( $json ) : $json; + } + + /** + * Adds non-significant whitespace to an existing JSON representation of an object. + * Only needed for PHP < 5.4, which lacks the JSON_PRETTY_PRINT option. + * + * @param string $json + * @return string + */ + private static function prettyPrint( $json ) { + $buf = ''; + $indent = 0; + $json = str_replace( '\"', "\x01", $json ); + for ( $i = 0, $n = strlen( $json ); $i < $n; $i += $skip ) { + $skip = 1; + switch ( $json[$i] ) { + case ':': + $buf .= ': '; + break; + case '[': + case '{': + $indent++; // falls through + case ',': + $buf .= $json[$i] . "\n" . str_repeat( ' ', $indent ); + break; + case ']': + case '}': + $indent--; + $buf .= "\n" . str_repeat( ' ', $indent ) . $json[$i]; + break; + case '"': + $skip = strcspn( $json, '"', $i + 1 ) + 2; + $buf .= substr( $json, $i, $skip ); + break; + default: + $skip = strcspn( $json, ',]}"', $i + 1 ) + 1; + $buf .= substr( $json, $i, $skip ); + } + } + return str_replace( "\x01", '\"', preg_replace( '/ +$/m', '', $buf ) ); + } } diff --git a/includes/json/Services_JSON.php b/includes/json/Services_JSON.php deleted file mode 100644 index b7c101a1c02..00000000000 --- a/includes/json/Services_JSON.php +++ /dev/null @@ -1,882 +0,0 @@ - -* @author Matt Knapp -* @author Brett Stimmerman -* @copyright 2005 Michal Migurski -* @version CVS: $Id$ -* @license http://www.opensource.org/licenses/bsd-license.php -* @see http://pear.php.net/pepr/pepr-proposal-show.php?id=198 -*/ - -/** -* Marker constant for Services_JSON::decode(), used to flag stack state -*/ -define('SERVICES_JSON_SLICE', 1); - -/** -* Marker constant for Services_JSON::decode(), used to flag stack state -*/ -define('SERVICES_JSON_IN_STR', 2); - -/** -* Marker constant for Services_JSON::decode(), used to flag stack state -*/ -define('SERVICES_JSON_IN_ARR', 3); - -/** -* Marker constant for Services_JSON::decode(), used to flag stack state -*/ -define('SERVICES_JSON_IN_OBJ', 4); - -/** -* Marker constant for Services_JSON::decode(), used to flag stack state -*/ -define('SERVICES_JSON_IN_CMT', 5); - -/** -* Behavior switch for Services_JSON::decode() -*/ -define('SERVICES_JSON_LOOSE_TYPE', 16); - -/** -* Behavior switch for Services_JSON::decode() -*/ -define('SERVICES_JSON_SUPPRESS_ERRORS', 32); - -/** - * Converts to and from JSON format. - * - * Brief example of use: - * - * - * // create a new instance of Services_JSON - * $json = new Services_JSON(); - * - * // convert a complex value to JSON notation, and send it to the browser - * $value = array('foo', 'bar', array(1, 2, 'baz'), array(3, array(4))); - * $output = $json->encode($value); - * - * print($output); - * // prints: ["foo","bar",[1,2,"baz"],[3,[4]]] - * - * // accept incoming POST data, assumed to be in JSON notation - * $input = file_get_contents('php://input', 1000000); - * $value = $json->decode($input); - * - * - * @ingroup API - */ -class Services_JSON -{ - /** - * constructs a new JSON instance - * - * @param $use Integer: object behavior flags; combine with boolean-OR - * - * possible values: - * - SERVICES_JSON_LOOSE_TYPE: loose typing. - * "{...}" syntax creates associative arrays - * instead of objects in decode(). - * - SERVICES_JSON_SUPPRESS_ERRORS: error suppression. - * Values which can't be encoded (e.g. resources) - * appear as NULL instead of throwing errors. - * By default, a deeply-nested resource will - * bubble up with an error, so all return values - * from encode() should be checked with isError() - */ - function __construct($use = 0) - { - $this->use = $use; - } - - private static $mHavePear = null; - /** - * Returns cached result of class_exists('pear'), to avoid calling AutoLoader numerous times - * in cases when PEAR is not present. - * @return boolean - */ - private static function pearInstalled() { - if ( self::$mHavePear === null ) { - self::$mHavePear = class_exists( 'pear' ); - } - return self::$mHavePear; - } - - /** - * convert a string from one UTF-16 char to one UTF-8 char - * - * Normally should be handled by mb_convert_encoding, but - * provides a slower PHP-only method for installations - * that lack the multibyte string extension. - * - * @param string $utf16 UTF-16 character - * @return String: UTF-8 character - * @access private - */ - function utf162utf8($utf16) - { - // oh please oh please oh please oh please oh please - if(function_exists('mb_convert_encoding')) { - return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); - } - - $bytes = (ord($utf16[0]) << 8) | ord($utf16[1]); - - switch(true) { - case ((0x7F & $bytes) == $bytes): - // this case should never be reached, because we are in ASCII range - // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - return chr(0x7F & $bytes); - - case (0x07FF & $bytes) == $bytes: - // return a 2-byte UTF-8 character - // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - return chr(0xC0 | (($bytes >> 6) & 0x1F)) - . chr(0x80 | ($bytes & 0x3F)); - - case (0xFC00 & $bytes) == 0xD800 && strlen($utf16) >= 4 && (0xFC & ord($utf16[2])) == 0xDC: - // return a 4-byte UTF-8 character - $char = ((($bytes & 0x03FF) << 10) - | ((ord($utf16[2]) & 0x03) << 8) - | ord($utf16[3])); - $char += 0x10000; - return chr(0xF0 | (($char >> 18) & 0x07)) - . chr(0x80 | (($char >> 12) & 0x3F)) - . chr(0x80 | (($char >> 6) & 0x3F)) - . chr(0x80 | ($char & 0x3F)); - - case (0xFFFF & $bytes) == $bytes: - // return a 3-byte UTF-8 character - // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - return chr(0xE0 | (($bytes >> 12) & 0x0F)) - . chr(0x80 | (($bytes >> 6) & 0x3F)) - . chr(0x80 | ($bytes & 0x3F)); - } - - // ignoring UTF-32 for now, sorry - return ''; - } - - /** - * convert a string from one UTF-8 char to one UTF-16 char - * - * Normally should be handled by mb_convert_encoding, but - * provides a slower PHP-only method for installations - * that lack the multibyte string extension. - * - * @param string $utf8 UTF-8 character - * @return String: UTF-16 character - * @access private - */ - function utf82utf16($utf8) - { - // oh please oh please oh please oh please oh please - if(function_exists('mb_convert_encoding')) { - return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8'); - } - - switch(strlen($utf8)) { - case 1: - // this case should never be reached, because we are in ASCII range - // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - return $utf8; - - case 2: - // return a UTF-16 character from a 2-byte UTF-8 char - // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - return chr(0x07 & (ord($utf8[0]) >> 2)) - . chr((0xC0 & (ord($utf8[0]) << 6)) - | (0x3F & ord($utf8[1]))); - - case 3: - // return a UTF-16 character from a 3-byte UTF-8 char - // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - return chr((0xF0 & (ord($utf8[0]) << 4)) - | (0x0F & (ord($utf8[1]) >> 2))) - . chr((0xC0 & (ord($utf8[1]) << 6)) - | (0x7F & ord($utf8[2]))); - - case 4: - // return a UTF-16 surrogate pair from a 4-byte UTF-8 char - if(ord($utf8[0]) > 0xF4) return ''; # invalid - $char = ((0x1C0000 & (ord($utf8[0]) << 18)) - | (0x03F000 & (ord($utf8[1]) << 12)) - | (0x000FC0 & (ord($utf8[2]) << 6)) - | (0x00003F & ord($utf8[3]))); - if($char > 0x10FFFF) return ''; # invalid - $char -= 0x10000; - return chr(0xD8 | (($char >> 18) & 0x03)) - . chr(($char >> 10) & 0xFF) - . chr(0xDC | (($char >> 8) & 0x03)) - . chr($char & 0xFF); - } - - // ignoring UTF-32 for now, sorry - return ''; - } - - /** - * encodes an arbitrary variable into JSON format - * - * @param $var Mixed: any number, boolean, string, array, or object to be encoded. - * see argument 1 to Services_JSON() above for array-parsing behavior. - * if var is a string, note that encode() always expects it - * to be in ASCII or UTF-8 format! - * @param $pretty Boolean: pretty-print output with indents and newlines - * - * @return mixed JSON string representation of input var or an error if a problem occurs - * @access public - */ - function encode($var, $pretty=false) - { - $this->indent = 0; - $this->pretty = $pretty; - $this->nameValSeparator = $pretty ? ': ' : ':'; - return $this->encode2($var); - } - - /** - * encodes an arbitrary variable into JSON format - * - * @param $var Mixed: any number, boolean, string, array, or object to be encoded. - * see argument 1 to Services_JSON() above for array-parsing behavior. - * if var is a string, note that encode() always expects it - * to be in ASCII or UTF-8 format! - * - * @return mixed JSON string representation of input var or an error if a problem occurs - * @access private - */ - function encode2($var) - { - if ($this->pretty) { - $close = "\n" . str_repeat("\t", $this->indent); - $open = $close . "\t"; - $mid = ',' . $open; - } - else { - $open = $close = ''; - $mid = ','; - } - - switch (gettype($var)) { - case 'boolean': - return $var ? 'true' : 'false'; - - case 'NULL': - return 'null'; - - case 'integer': - return (int) $var; - - case 'double': - case 'float': - return (float) $var; - - case 'string': - // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT - $ascii = ''; - $strlen_var = strlen($var); - - /* - * Iterate over every character in the string, - * escaping with a slash or encoding to UTF-8 where necessary - */ - for ($c = 0; $c < $strlen_var; ++$c) { - - $ord_var_c = ord($var[$c]); - - switch (true) { - case $ord_var_c == 0x08: - $ascii .= '\b'; - break; - case $ord_var_c == 0x09: - $ascii .= '\t'; - break; - case $ord_var_c == 0x0A: - $ascii .= '\n'; - break; - case $ord_var_c == 0x0C: - $ascii .= '\f'; - break; - case $ord_var_c == 0x0D: - $ascii .= '\r'; - break; - - case $ord_var_c == 0x22: - case $ord_var_c == 0x2F: - case $ord_var_c == 0x5C: - // double quote, slash, slosh - $ascii .= '\\'.$var[$c]; - break; - - case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)): - // characters U-00000000 - U-0000007F (same as ASCII) - $ascii .= $var[$c]; - break; - - case (($ord_var_c & 0xE0) == 0xC0): - // characters U-00000080 - U-000007FF, mask 110XXXXX - // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - $char = pack('C*', $ord_var_c, ord($var[$c + 1])); - $c += 1; - $utf16 = $this->utf82utf16($char); - $ascii .= sprintf('\u%04s', bin2hex($utf16)); - break; - - case (($ord_var_c & 0xF0) == 0xE0): - // characters U-00000800 - U-0000FFFF, mask 1110XXXX - // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - $char = pack('C*', $ord_var_c, - ord($var[$c + 1]), - ord($var[$c + 2])); - $c += 2; - $utf16 = $this->utf82utf16($char); - $ascii .= sprintf('\u%04s', bin2hex($utf16)); - break; - - case (($ord_var_c & 0xF8) == 0xF0): - // characters U-00010000 - U-001FFFFF, mask 11110XXX - // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - // These will always return a surrogate pair - $char = pack('C*', $ord_var_c, - ord($var[$c + 1]), - ord($var[$c + 2]), - ord($var[$c + 3])); - $c += 3; - $utf16 = $this->utf82utf16($char); - if($utf16 == '') { - $ascii .= '\ufffd'; - } else { - $utf16 = str_split($utf16, 2); - $ascii .= sprintf('\u%04s\u%04s', bin2hex($utf16[0]), bin2hex($utf16[1])); - } - break; - } - } - - return '"'.$ascii.'"'; - - case 'array': - /* - * As per JSON spec if any array key is not an integer - * we must treat the the whole array as an object. We - * also try to catch a sparsely populated associative - * array with numeric keys here because some JS engines - * will create an array with empty indexes up to - * max_index which can cause memory issues and because - * the keys, which may be relevant, will be remapped - * otherwise. - * - * As per the ECMA and JSON specification an object may - * have any string as a property. Unfortunately due to - * a hole in the ECMA specification if the key is a - * ECMA reserved word or starts with a digit the - * parameter is only accessible using ECMAScript's - * bracket notation. - */ - - // treat as a JSON object - if (is_array($var) && count($var) && (array_keys($var) !== range(0, count($var) - 1))) { - $this->indent++; - $properties = array_map(array($this, 'name_value'), - array_keys($var), - array_values($var)); - $this->indent--; - - foreach($properties as $property) { - if($this->isError($property)) { - return $property; - } - } - - return '{' . $open . join($mid, $properties) . $close . '}'; - } - - // treat it like a regular array - $this->indent++; - $elements = array_map(array($this, 'encode2'), $var); - $this->indent--; - - foreach($elements as $element) { - if($this->isError($element)) { - return $element; - } - } - - return '[' . $open . join($mid, $elements) . $close . ']'; - - case 'object': - $vars = get_object_vars($var); - - $this->indent++; - $properties = array_map(array($this, 'name_value'), - array_keys($vars), - array_values($vars)); - $this->indent--; - - foreach($properties as $property) { - if($this->isError($property)) { - return $property; - } - } - - return '{' . $open . join($mid, $properties) . $close . '}'; - - default: - return ($this->use & SERVICES_JSON_SUPPRESS_ERRORS) - ? 'null' - : new Services_JSON_Error(gettype($var)." can not be encoded as JSON string"); - } - } - - /** - * array-walking function for use in generating JSON-formatted name-value pairs - * - * @param string $name name of key to use - * @param $value Mixed: reference to an array element to be encoded - * - * @return String: JSON-formatted name-value pair, like '"name":value' - * @access private - */ - function name_value($name, $value) - { - $encoded_value = $this->encode2($value); - - if($this->isError($encoded_value)) { - return $encoded_value; - } - - return $this->encode2(strval($name)) . $this->nameValSeparator . $encoded_value; - } - - /** - * reduce a string by removing leading and trailing comments and whitespace - * - * @param string $str string value to strip of comments and whitespace - * - * @return String: string value stripped of comments and whitespace - * @access private - */ - function reduce_string($str) - { - $str = preg_replace(array( - - // eliminate single line comments in '// ...' form - '#^\s*//(.+)$#m', - - // eliminate multi-line comments in '/* ... */' form, at start of string - '#^\s*/\*(.+)\*/#Us', - - // eliminate multi-line comments in '/* ... */' form, at end of string - '#/\*(.+)\*/\s*$#Us' - - ), '', $str); - - // eliminate extraneous space - return trim($str); - } - - /** - * decodes a JSON string into appropriate variable - * - * @param string $str JSON-formatted string - * - * @return mixed number, boolean, string, array, or object - * corresponding to given JSON input string. - * See argument 1 to Services_JSON() above for object-output behavior. - * Note that decode() always returns strings - * in ASCII or UTF-8 format! - * @access public - */ - function decode($str) - { - $str = $this->reduce_string($str); - - switch (strtolower($str)) { - case 'true': - return true; - - case 'false': - return false; - - case 'null': - return null; - - default: - $m = array(); - - if (is_numeric($str)) { - // Lookie-loo, it's a number - - // This would work on its own, but I'm trying to be - // good about returning integers where appropriate: - // return (float)$str; - - // Return float or int, as appropriate - return ((float)$str == (integer)$str) - ? (integer)$str - : (float)$str; - - } elseif (preg_match('/^("|\').*(\1)$/s', $str, $m) && $m[1] == $m[2]) { - // STRINGS RETURNED IN UTF-8 FORMAT - $delim = substr($str, 0, 1); - $chrs = substr($str, 1, -1); - $utf8 = ''; - $strlen_chrs = strlen($chrs); - - for ($c = 0; $c < $strlen_chrs; ++$c) { - - $substr_chrs_c_2 = substr($chrs, $c, 2); - $ord_chrs_c = ord($chrs[$c]); - - switch (true) { - case $substr_chrs_c_2 == '\b': - $utf8 .= chr(0x08); - ++$c; - break; - case $substr_chrs_c_2 == '\t': - $utf8 .= chr(0x09); - ++$c; - break; - case $substr_chrs_c_2 == '\n': - $utf8 .= chr(0x0A); - ++$c; - break; - case $substr_chrs_c_2 == '\f': - $utf8 .= chr(0x0C); - ++$c; - break; - case $substr_chrs_c_2 == '\r': - $utf8 .= chr(0x0D); - ++$c; - break; - - case $substr_chrs_c_2 == '\\"': - case $substr_chrs_c_2 == '\\\'': - case $substr_chrs_c_2 == '\\\\': - case $substr_chrs_c_2 == '\\/': - if (($delim == '"' && $substr_chrs_c_2 != '\\\'') || - ($delim == "'" && $substr_chrs_c_2 != '\\"')) { - $utf8 .= $chrs[++$c]; - } - break; - - case preg_match('/\\\uD[89AB][0-9A-F]{2}\\\uD[C-F][0-9A-F]{2}/i', substr($chrs, $c, 12)): - // escaped unicode surrogate pair - $utf16 = chr(hexdec(substr($chrs, ($c + 2), 2))) - . chr(hexdec(substr($chrs, ($c + 4), 2))) - . chr(hexdec(substr($chrs, ($c + 8), 2))) - . chr(hexdec(substr($chrs, ($c + 10), 2))); - $utf8 .= $this->utf162utf8($utf16); - $c += 11; - break; - - case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)): - // single, escaped unicode character - $utf16 = chr(hexdec(substr($chrs, ($c + 2), 2))) - . chr(hexdec(substr($chrs, ($c + 4), 2))); - $utf8 .= $this->utf162utf8($utf16); - $c += 5; - break; - - case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F): - $utf8 .= $chrs[$c]; - break; - - case ($ord_chrs_c & 0xE0) == 0xC0: - // characters U-00000080 - U-000007FF, mask 110XXXXX - //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - $utf8 .= substr($chrs, $c, 2); - ++$c; - break; - - case ($ord_chrs_c & 0xF0) == 0xE0: - // characters U-00000800 - U-0000FFFF, mask 1110XXXX - // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - $utf8 .= substr($chrs, $c, 3); - $c += 2; - break; - - case ($ord_chrs_c & 0xF8) == 0xF0: - // characters U-00010000 - U-001FFFFF, mask 11110XXX - // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - $utf8 .= substr($chrs, $c, 4); - $c += 3; - break; - - case ($ord_chrs_c & 0xFC) == 0xF8: - // characters U-00200000 - U-03FFFFFF, mask 111110XX - // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - $utf8 .= substr($chrs, $c, 5); - $c += 4; - break; - - case ($ord_chrs_c & 0xFE) == 0xFC: - // characters U-04000000 - U-7FFFFFFF, mask 1111110X - // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 - $utf8 .= substr($chrs, $c, 6); - $c += 5; - break; - - } - - } - - return $utf8; - - } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) { - // array, or object notation - - if ($str[0] == '[') { - $stk = array(SERVICES_JSON_IN_ARR); - $arr = array(); - } else { - if ($this->use & SERVICES_JSON_LOOSE_TYPE) { - $stk = array(SERVICES_JSON_IN_OBJ); - $obj = array(); - } else { - $stk = array(SERVICES_JSON_IN_OBJ); - $obj = new stdClass(); - } - } - - array_push($stk, array( 'what' => SERVICES_JSON_SLICE, - 'where' => 0, - 'delim' => false)); - - $chrs = substr($str, 1, -1); - $chrs = $this->reduce_string($chrs); - - if ($chrs == '') { - if (reset($stk) == SERVICES_JSON_IN_ARR) { - return $arr; - - } else { - return $obj; - - } - } - - //print("\nparsing {$chrs}\n"); - - $strlen_chrs = strlen($chrs); - - for ($c = 0; $c <= $strlen_chrs; ++$c) { - - $top = end($stk); - $substr_chrs_c_2 = substr($chrs, $c, 2); - - if (($c == $strlen_chrs) || (($chrs[$c] == ',') && ($top['what'] == SERVICES_JSON_SLICE))) { - // found a comma that is not inside a string, array, etc., - // OR we've reached the end of the character list - $slice = substr($chrs, $top['where'], ($c - $top['where'])); - array_push($stk, array('what' => SERVICES_JSON_SLICE, 'where' => ($c + 1), 'delim' => false)); - //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); - - if (reset($stk) == SERVICES_JSON_IN_ARR) { - // we are in an array, so just push an element onto the stack - array_push($arr, $this->decode($slice)); - - } elseif (reset($stk) == SERVICES_JSON_IN_OBJ) { - // we are in an object, so figure - // out the property name and set an - // element in an associative array, - // for now - $parts = array(); - - if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { - // "name":value pair - $key = $this->decode($parts[1]); - $val = $this->decode($parts[2]); - - if ($this->use & SERVICES_JSON_LOOSE_TYPE) { - $obj[$key] = $val; - } else { - $obj->$key = $val; - } - } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { - // name:value pair, where name is unquoted - $key = $parts[1]; - $val = $this->decode($parts[2]); - - if ($this->use & SERVICES_JSON_LOOSE_TYPE) { - $obj[$key] = $val; - } else { - $obj->$key = $val; - } - } - - } - - } elseif ((($chrs[$c] == '"') || ($chrs[$c] == "'")) && ($top['what'] != SERVICES_JSON_IN_STR)) { - // found a quote, and we are not inside a string - array_push($stk, array('what' => SERVICES_JSON_IN_STR, 'where' => $c, 'delim' => $chrs[$c])); - //print("Found start of string at {$c}\n"); - - } elseif (($chrs[$c] == $top['delim']) && - ($top['what'] == SERVICES_JSON_IN_STR) && - (($chrs[$c - 1] != '\\') || - ($chrs[$c - 1] == '\\' && $chrs[$c - 2] == '\\'))) { - // found a quote, we're in a string, and it's not escaped - array_pop($stk); - //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n"); - - } elseif (($chrs[$c] == '[') && - in_array($top['what'], array(SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ))) { - // found a left-bracket, and we are in an array, object, or slice - array_push($stk, array('what' => SERVICES_JSON_IN_ARR, 'where' => $c, 'delim' => false)); - //print("Found start of array at {$c}\n"); - - } elseif (($chrs[$c] == ']') && ($top['what'] == SERVICES_JSON_IN_ARR)) { - // found a right-bracket, and we're in an array - array_pop($stk); - //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); - - } elseif (($chrs[$c] == '{') && - in_array($top['what'], array(SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ))) { - // found a left-brace, and we are in an array, object, or slice - array_push($stk, array('what' => SERVICES_JSON_IN_OBJ, 'where' => $c, 'delim' => false)); - //print("Found start of object at {$c}\n"); - - } elseif (($chrs[$c] == '}') && ($top['what'] == SERVICES_JSON_IN_OBJ)) { - // found a right-brace, and we're in an object - array_pop($stk); - //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); - - } elseif (($substr_chrs_c_2 == '/*') && - in_array($top['what'], array(SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ))) { - // found a comment start, and we are in an array, object, or slice - array_push($stk, array('what' => SERVICES_JSON_IN_CMT, 'where' => $c, 'delim' => false)); - $c++; - //print("Found start of comment at {$c}\n"); - - } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == SERVICES_JSON_IN_CMT)) { - // found a comment end, and we're in one now - array_pop($stk); - $c++; - - for ($i = $top['where']; $i <= $c; ++$i) - $chrs = substr_replace($chrs, ' ', $i, 1); - - //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); - - } - - } - - if (reset($stk) == SERVICES_JSON_IN_ARR) { - return $arr; - - } elseif (reset($stk) == SERVICES_JSON_IN_OBJ) { - return $obj; - - } - - } - } - } - - /** - * @todo Ultimately, this should just call PEAR::isError() - * @return bool - */ - function isError($data, $code = null) - { - if ( self::pearInstalled() ) { - //avoid some strict warnings on PEAR isError check (looks like http://pear.php.net/bugs/bug.php?id=9950 has been around for some time) - return @PEAR::isError($data, $code); - } elseif (is_object($data) && (get_class($data) == 'services_json_error' || - is_subclass_of($data, 'services_json_error'))) { - return true; - } - - return false; - } -} - - -// Hide the PEAR_Error variant from Doxygen -/// @cond -if (class_exists('PEAR_Error')) { - - /** - * @ingroup API - */ - class Services_JSON_Error extends PEAR_Error - { - function Services_JSON_Error($message = 'unknown error', $code = null, - $mode = null, $options = null, $userinfo = null) - { - parent::PEAR_Error($message, $code, $mode, $options, $userinfo); - } - } - -} else { -/// @endcond - - /** - * @todo Ultimately, this class shall be descended from PEAR_Error - * @ingroup API - */ - class Services_JSON_Error - { - function Services_JSON_Error($message = 'unknown error', $code = null, - $mode = null, $options = null, $userinfo = null) - { - $this->message = $message; - } - - function __toString() - { - return $this->message; - } - } -} diff --git a/includes/resourceloader/ResourceLoader.php b/includes/resourceloader/ResourceLoader.php index 27f682c2f72..62c08223636 100644 --- a/includes/resourceloader/ResourceLoader.php +++ b/includes/resourceloader/ResourceLoader.php @@ -863,7 +863,9 @@ class ResourceLoader { // output javascript "[]" instead of "{}". This fixes that. (object)$styles, (object)$messages - ) ); + ), + ResourceLoader::inDebugMode() + ); } /** @@ -1039,7 +1041,7 @@ class ResourceLoader { * @return string */ public static function makeConfigSetScript( array $configuration ) { - return Xml::encodeJsCall( 'mw.config.set', array( $configuration ) ); + return Xml::encodeJsCall( 'mw.config.set', array( $configuration ), ResourceLoader::inDebugMode() ); } /** diff --git a/includes/resourceloader/ResourceLoaderUserOptionsModule.php b/includes/resourceloader/ResourceLoaderUserOptionsModule.php index 4624cbcefa8..0b7e1964770 100644 --- a/includes/resourceloader/ResourceLoaderUserOptionsModule.php +++ b/includes/resourceloader/ResourceLoaderUserOptionsModule.php @@ -56,7 +56,9 @@ class ResourceLoaderUserOptionsModule extends ResourceLoaderModule { public function getScript( ResourceLoaderContext $context ) { global $wgUser; return Xml::encodeJsCall( 'mw.user.options.set', - array( $wgUser->getOptions() ) ); + array( $wgUser->getOptions() ), + ResourceLoader::inDebugMode() + ); } /** diff --git a/includes/resourceloader/ResourceLoaderUserTokensModule.php b/includes/resourceloader/ResourceLoaderUserTokensModule.php index 6d787c503ac..f3090dd3fd7 100644 --- a/includes/resourceloader/ResourceLoaderUserTokensModule.php +++ b/includes/resourceloader/ResourceLoaderUserTokensModule.php @@ -54,7 +54,9 @@ class ResourceLoaderUserTokensModule extends ResourceLoaderModule { */ public function getScript( ResourceLoaderContext $context ) { return Xml::encodeJsCall( 'mw.user.tokens.set', - array( $this->contextUserTokens( $context ) ) ); + array( $this->contextUserTokens( $context ) ), + ResourceLoader::inDebugMode() + ); } /** diff --git a/tests/phpunit/includes/JsonTest.php b/tests/phpunit/includes/JsonTest.php deleted file mode 100644 index 96a2ead528a..00000000000 --- a/tests/phpunit/includes/JsonTest.php +++ /dev/null @@ -1,27 +0,0 @@ -assertNotEquals( - '\ud840\udc00', - strtolower( FormatJson::encode( "\xf0\xa0\x80\x80" ) ), - 'Test encoding an broken json_encode character (U+20000)' - ); - - } - - function testDecodeVarTypes() { - $this->assertInternalType( - 'object', - FormatJson::decode( '{"Name": "Cheeso", "Rank": 7}' ), - 'Default to object' - ); - - $this->assertInternalType( - 'array', - FormatJson::decode( '{"Name": "Cheeso", "Rank": 7}', true ), - 'Optional array' - ); - } -} diff --git a/tests/phpunit/includes/json/FormatJsonTest.php b/tests/phpunit/includes/json/FormatJsonTest.php new file mode 100644 index 00000000000..9e25e18fa22 --- /dev/null +++ b/tests/phpunit/includes/json/FormatJsonTest.php @@ -0,0 +1,161 @@ + new stdClass, + 'emptyArray' => array(), + 'string' => 'foobar', + 'filledArray' => array( + array( + 123, + 456, + ), + '"7":["8",{"9":"10"}]', + ), + ); + + // 4 space indent, no trailing whitespace, no trailing linefeed + $json = '{ + "emptyObject": { + + }, + "emptyArray": [ + + ], + "string": "foobar", + "filledArray": [ + [ + 123, + 456 + ], + "\"7\":[\"8\",{\"9\":\"10\"}]" + ] +}'; + + $json = str_replace( "\r", '', $json ); // Windows compat + $this->assertSame( $json, FormatJson::encode( $obj, true ) ); + } + + public static function provideEncodeDefault() { + return self::getEncodeTestCases( array() ); + } + + /** + * @dataProvider provideEncodeDefault + */ + public function testEncodeDefault( $from, $to ) { + $this->assertSame( $to, FormatJson::encode( $from ) ); + } + + public static function provideEncodeUtf8() { + return self::getEncodeTestCases( array( 'unicode' ) ); + } + + /** + * @dataProvider provideEncodeUtf8 + */ + public function testEncodeUtf8( $from, $to ) { + $this->assertSame( $to, FormatJson::encode( $from, false, FormatJson::UTF8_OK ) ); + } + + public static function provideEncodeXmlMeta() { + return self::getEncodeTestCases( array( 'xmlmeta' ) ); + } + + /** + * @dataProvider provideEncodeXmlMeta + */ + public function testEncodeXmlMeta( $from, $to ) { + $this->assertSame( $to, FormatJson::encode( $from, false, FormatJson::XMLMETA_OK ) ); + } + + public static function provideEncodeAllOk() { + return self::getEncodeTestCases( array( 'unicode', 'xmlmeta' ) ); + } + + /** + * @dataProvider provideEncodeAllOk + */ + public function testEncodeAllOk( $from, $to ) { + $this->assertSame( $to, FormatJson::encode( $from, false, FormatJson::ALL_OK ) ); + } + + public function testEncodePhpBug46944() { + $this->assertNotEquals( + '\ud840\udc00', + strtolower( FormatJson::encode( "\xf0\xa0\x80\x80" ) ), + 'Test encoding an broken json_encode character (U+20000)' + ); + + } + + public function testDecodeReturnType() { + $this->assertInternalType( + 'object', + FormatJson::decode( '{"Name": "Cheeso", "Rank": 7}' ), + 'Default to object' + ); + + $this->assertInternalType( + 'array', + FormatJson::decode( '{"Name": "Cheeso", "Rank": 7}', true ), + 'Optional array' + ); + } + + /** + * Generate a set of test cases for a particular combination of encoder options. + * + * @param array $unescapedGroups List of character groups to leave unescaped + * @return array: Arrays of unencoded strings and corresponding encoded strings + */ + private static function getEncodeTestCases( array $unescapedGroups ) { + $groups = array( + 'always' => array( + // Forward slash (always unescaped) + '/' => '/', + + // Control characters + "\0" => '\u0000', + "\x08" => '\b', + "\t" => '\t', + "\n" => '\n', + "\r" => '\r', + "\f" => '\f', + "\x1f" => '\u001f', // representative example + + // Double quotes + '"' => '\"', + + // Backslashes + '\\' => '\\\\', + '\\\\' => '\\\\\\\\', + '\\u00e9' => '\\\u00e9', // security check for Unicode unescaping + + // Line terminators + "\xe2\x80\xa8" => '\u2028', + "\xe2\x80\xa9" => '\u2029', + ), + 'unicode' => array( + "\xc3\xa9" => '\u00e9', + "\xf0\x9d\x92\x9e" => '\ud835\udc9e', // U+1D49E, outside the BMP + ), + 'xmlmeta' => array( + '<' => '\u003C', // JSON_HEX_TAG uses uppercase hex digits + '>' => '\u003E', + '&' => '\u0026', + ), + ); + + $cases = array(); + foreach ( $groups as $name => $rules ) { + $leaveUnescaped = in_array( $name, $unescapedGroups ); + foreach ( $rules as $from => $to ) { + $cases[] = array( $from, '"' . ( $leaveUnescaped ? $from : $to ) . '"' ); + } + } + return $cases; + } +} diff --git a/tests/phpunit/includes/json/ServicesJsonTest.php b/tests/phpunit/includes/json/ServicesJsonTest.php deleted file mode 100644 index 50518303816..00000000000 --- a/tests/phpunit/includes/json/ServicesJsonTest.php +++ /dev/null @@ -1,93 +0,0 @@ -encode() - * produce the same output - * - * @dataProvider provideValuesToEncode - */ - public function testJsonEncode( $input, $desc ) { - if ( !function_exists( 'json_encode' ) ) { - $this->markTestIncomplete( 'No PHP json support, unable to test' ); - return; - } elseif ( strtolower( json_encode( "\xf0\xa0\x80\x80" ) ) != '"\ud840\udc00"' ) { - $this->markTestIncomplete( 'Have buggy PHP json support, unable to test' ); - return; - } else { - $jsonObj = new Services_JSON(); - $this->assertEquals( - $jsonObj->encode( $input ), - json_encode( $input ), - $desc - ); - } - } - - /** - * Test to make sure core json_decode() and our Services_Json()->decode() - * produce the same output - * - * @dataProvider provideValuesToDecode - */ - public function testJsonDecode( $input, $desc ) { - if ( !function_exists( 'json_decode' ) ) { - $this->markTestIncomplete( 'No PHP json support, unable to test' ); - return; - } else { - $jsonObj = new Services_JSON(); - $this->assertEquals( - $jsonObj->decode( $input ), - json_decode( $input ), - $desc - ); - } - } - - public static function provideValuesToEncode() { - $obj = new stdClass(); - $obj->property = 'value'; - $obj->property2 = null; - $obj->property3 = 1.234; - return array( - array( 1, 'basic integer' ), - array( -1, 'negative integer' ), - array( 1.1, 'basic float' ), - array( true, 'basic bool true' ), - array( false, 'basic bool false' ), - array( 'some string', 'basic string test' ), - array( "some string\nwith newline", 'newline string test' ), - array( '♥ü', 'unicode string test' ), - array( array( 'some', 'string', 'values' ), 'basic array of strings' ), - array( array( 'key1' => 'val1', 'key2' => 'val2' ), 'array with string keys' ), - array( array( 1 => 'val1', 3 => 'val2', '2' => 'val3' ), 'out of order numbered array test' ), - array( array(), 'empty array test' ), - array( $obj, 'basic object test' ), - array( new stdClass, 'empty object test' ), - array( null, 'null test' ), - ); - } - - public static function provideValuesToDecode() { - return array( - array( '1', 'basic integer' ), - array( '-1', 'negative integer' ), - array( '1.1', 'basic float' ), - array( '1.1e1', 'scientific float' ), - array( 'true', 'basic bool true' ), - array( 'false', 'basic bool false' ), - array( '"some string"', 'basic string test' ), - array( '"some string\nwith newline"', 'newline string test' ), - array( '"♥ü"', 'unicode character string test' ), - array( '"\u2665"', 'unicode \\u string test' ), - array( '["some","string","values"]', 'basic array of strings' ), - array( '[]', 'empty array test' ), - array( '{"key":"value"}', 'Basic key => value test' ), - array( '{}', 'empty object test' ), - array( 'null', 'null test' ), - ); - } -} diff --git a/tests/qunit/data/load.mock.php b/tests/qunit/data/load.mock.php index 7ff392ab4a3..f6eff77a5b1 100644 --- a/tests/qunit/data/load.mock.php +++ b/tests/qunit/data/load.mock.php @@ -24,6 +24,7 @@ */ header( 'Content-Type: text/javascript; charset=utf-8' ); +require_once __DIR__ . '/../../../includes/json/FormatJson.php'; require_once __DIR__ . '/../../../includes/Xml.php'; $moduleImplementations = array( @@ -50,7 +51,7 @@ if ( isset( $_GET['modules'] ) ) { if ( isset( $moduleImplementations[$module] ) ) { $response .= $moduleImplementations[$module]; } else { - $response .= Xml::encodeJsCall( 'mw.loader.state', array( $module, 'missing' ) ); + $response .= Xml::encodeJsCall( 'mw.loader.state', array( $module, 'missing' ), true ); } } } -- 2.11.4.GIT