FormatJson: microoptimizations for UTF8_OK mode
authorKevin Israel <pleasestand@live.com>
Fri, 5 Apr 2013 19:15:02 +0000 (15:15 -0400)
committerGerrit Code Review <gerrit@wikimedia.org>
Mon, 8 Apr 2013 05:07:07 +0000 (05:07 +0000)
* Replace strtr with str_replace where faster.
* Use addcslashes to escape json_encode's output. Because no control
  characters are included, the only characters that have to be
  escaped are \ and ". (irrelevant for PHP 5.4+ installations)

Re-encoding a ~1.5 MB API response from the Chinese Wikipedia:
* PHP 5.3: 32% faster (from 347 ms to 239 ms)
* PHP 5.4: 70% faster (from 51 ms to 15 ms)
* HHVM: 42% faster (from 326 ms to 191 ms)

Change-Id: I7c9342682986d40a2f2436ac978390b6018a3521

includes/json/FormatJson.php

index bdf98d5..f17a1a1 100644 (file)
@@ -56,14 +56,22 @@ class FormatJson {
        const ALL_OK = 3;
 
        /**
-        * Characters problematic in JavaScript and their corresponding escape sequences.
+        * Characters problematic in JavaScript.
         *
         * @note These are listed in ECMA-262 (5.1 Ed.), ยง7.3 Line Terminators along with U+000A (LF)
         *       and U+000D (CR). However, PHP already escapes LF and CR according to RFC 4627.
         */
        private static $badChars = array(
-               "\xe2\x80\xa8" => '\u2028', // LINE SEPARATOR
-               "\xe2\x80\xa9" => '\u2029', // PARAGRAPH SEPARATOR
+               "\xe2\x80\xa8", // U+2028 LINE SEPARATOR
+               "\xe2\x80\xa9", // U+2029 PARAGRAPH SEPARATOR
+       );
+
+       /**
+        * Escape sequences for characters listed in FormatJson::$badChars.
+        */
+       private static $badCharsEscaped = array(
+               '\u2028', // U+2028 LINE SEPARATOR
+               '\u2029', // U+2029 PARAGRAPH SEPARATOR
        );
 
        /**
@@ -123,7 +131,10 @@ class FormatJson {
                if ( $json === false ) {
                        return false;
                }
-               return ( $escaping & self::UTF8_OK ) ? strtr( $json, self::$badChars ) : $json;
+               if ( $escaping & self::UTF8_OK ) {
+                       $json = str_replace( self::$badChars, self::$badCharsEscaped, $json );
+               }
+               return $json;
        }
 
        /**
@@ -151,9 +162,9 @@ class FormatJson {
                        //   each double-escaped backslash (\\\\) is replaced with \\\u005c.
                        // * We strip one of the backslashes from each of the escape sequences to unescape.
                        // * Then the JSON decoder can perform the actual unescaping.
-                       $doubled = str_replace( "\\\\\\\\", "\\\\\\u005c", json_encode( $json ) );
-                       $json = json_decode( preg_replace( "/\\\\\\\\u(?!00[0-7])/", "\\\\u", $doubled ) );
-                       $json = strtr( $json, self::$badChars );
+                       $json = str_replace( "\\\\\\\\", "\\\\\\u005c", addcslashes( $json, '\"' ) );
+                       $json = json_decode( preg_replace( "/\\\\\\\\u(?!00[0-7])/", "\\\\u", "\"$json\"" ) );
+                       $json = str_replace( self::$badChars, self::$badCharsEscaped, $json );
                }
                return $pretty ? self::prettyPrint( $json ) : $json;
        }