FormatJson::stripComments
authorBryan Davis <bd808@wikimedia.org>
Sun, 12 Oct 2014 17:59:00 +0000 (11:59 -0600)
committerBryan Davis <bd808@wikimedia.org>
Sun, 12 Oct 2014 18:34:22 +0000 (12:34 -0600)
Add stripComments method that can be used to remove single line and
multiline comments from an otherwise valid JSON string. Inspired by the
comment removal code in redisJobRunnerService and discussions on irc
about the Extension registration RFC.

Change-Id: Ie743957bfbb7b1fca8cb78ad48c1efd953362fde

includes/json/FormatJson.php
tests/phpunit/includes/json/FormatJsonTest.php

index f3e5c76..74775b5 100644 (file)
@@ -70,6 +70,13 @@ class FormatJson {
         */
        const TRY_FIXING = 0x200;
 
+       /**
+        * If set, strip comments from input before parsing as JSON.
+        *
+        * @since 1.25
+        */
+       const STRIP_COMMENTS = 0x400;
+
        /**
         * Regex that matches whitespace inside empty arrays and objects.
         *
@@ -150,10 +157,14 @@ class FormatJson {
         * Unlike FormatJson::decode(), if $value represents null value, it will be properly decoded as valid.
         *
         * @param string $value The JSON string being decoded
-        * @param int $options A bit field that allows FORCE_ASSOC, TRY_FIXING
+        * @param int $options A bit field that allows FORCE_ASSOC, TRY_FIXING,
+        * STRIP_COMMENTS
         * @return Status If valid JSON, the value is available in $result->getValue()
         */
        public static function parse( $value, $options = 0 ) {
+               if ( $options & self::STRIP_COMMENTS ) {
+                       $value = self::stripComments( $value );
+               }
                $assoc = ( $options & self::FORCE_ASSOC ) !== 0;
                $result = json_decode( $value, $assoc );
                $code = json_last_error();
@@ -347,4 +358,79 @@ class FormatJson {
 
                return str_replace( "\x01", '\"', $buf );
        }
+
+       /**
+        * Remove multiline and single line comments from an otherwise valid JSON
+        * input string. This can be used as a preprocessor for to allow JSON
+        * formatted configuration files to contain comments.
+        *
+        * @param string $json
+        * @return string JSON with comments removed
+        */
+       public static function stripComments( $json ) {
+               // Ensure we have a string
+               $str = (string) $json;
+               $buffer = '';
+               $maxLen = strlen( $str );
+               $mark = 0;
+
+               $inString = false;
+               $inComment = false;
+               $multiline = false;
+
+               for ($idx = 0; $idx < $maxLen; $idx++) {
+                       switch ( $str[$idx] ) {
+                               case '"':
+                                       $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : '';
+                                       if ( !$inComment && $lookBehind !== '\\' ) {
+                                               // Either started or ended a string
+                                               $inString = !$inString;
+                                       }
+                                       break;
+
+                               case '/':
+                                       $lookAhead = ( $idx + 1 < $maxLen ) ? $str[$idx + 1] : '';
+                                       $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : '';
+                                       if ( $inString ) {
+                                               continue;
+
+                                       } elseif ( !$inComment &&
+                                               ( $lookAhead === '/' || $lookAhead === '*' )
+                                       ) {
+                                               // Transition into a comment
+                                               // Add characters seen to buffer
+                                               $buffer .= substr( $str, $mark, $idx - $mark );
+                                               // Consume the look ahead character
+                                               $idx++;
+                                               // Track state
+                                               $inComment = true;
+                                               $multiline = $lookAhead === '*';
+
+                                       } elseif ( $multiline && $lookBehind === '*' ) {
+                                               // Found the end of the current comment
+                                               $mark = $idx + 1;
+                                               $inComment = false;
+                                               $multiline = false;
+                                       }
+                                       break;
+
+                               case "\n":
+                                       if ( $inComment && !$multiline ) {
+                                               // Found the end of the current comment
+                                               $mark = $idx + 1;
+                                               $inComment = false;
+                                       }
+                                       break;
+                       }
+               }
+               if ( $inComment ) {
+                       // Comment ends with input
+                       // Technically we should check to ensure that we aren't in
+                       // a multiline comment that hasn't been properly ended, but this
+                       // is a strip filter, not a validating parser.
+                       $mark = $maxLen;
+               }
+               // Add final chunk to buffer before returning
+               return $buffer . substr( $str, $mark, $maxLen - $mark );
+       }
 }
index af68ab0..456266f 100644 (file)
@@ -222,6 +222,64 @@ class FormatJsonTest extends MediaWikiTestCase {
                $this->assertFalse( $st->isOK() );
        }
 
+       public function provideStripComments() {
+               return array(
+                       array( '{"a":"b"}', '{"a":"b"}' ),
+                       array( "{\"a\":\"b\"}\n", "{\"a\":\"b\"}\n" ),
+                       array( '/*c*/{"c":"b"}', '{"c":"b"}' ),
+                       array( '{"a":"c"}/*c*/', '{"a":"c"}' ),
+                       array( '/*c//d*/{"c":"b"}', '{"c":"b"}' ),
+                       array( '{/*c*/"c":"b"}', '{"c":"b"}' ),
+                       array( "/*\nc\r\n*/{\"c\":\"b\"}", '{"c":"b"}' ),
+                       array( "//c\n{\"c\":\"b\"}", '{"c":"b"}' ),
+                       array( "//c\r\n{\"c\":\"b\"}", '{"c":"b"}' ),
+                       array( '{"a":"c"}//c', '{"a":"c"}' ),
+                       array( "{\"a-c\"://c\n\"b\"}", '{"a-c":"b"}' ),
+                       array( '{"/*a":"b"}', '{"/*a":"b"}' ),
+                       array( '{"a":"//b"}', '{"a":"//b"}' ),
+                       array( '{"a":"b/*c*/"}', '{"a":"b/*c*/"}' ),
+                       array( "{\"\\\"/*a\":\"b\"}", "{\"\\\"/*a\":\"b\"}" ),
+                       array( '', '' ),
+                       array( '/*c', '' ),
+                       array( '//c', '' ),
+                       array( '"http://example.com"', '"http://example.com"' ),
+                       array( "\0", "\0" ),
+                       array( '"Blåbærsyltetøy"', '"Blåbærsyltetøy"' ),
+               );
+       }
+
+       /**
+        * @covers FormatJson::stripComments
+        * @dataProvider provideStripComments
+        * @param string $json
+        * @param string $expect
+        */
+       public function testStripComments( $json, $expect ) {
+               $this->assertSame( $expect, FormatJson::stripComments( $json ) );
+       }
+
+       public function provideParseStripComments() {
+               return array(
+                       array( '/* blah */true', true ),
+                       array( "// blah \ntrue", true ),
+                       array( '[ "a" , /* blah */ "b" ]', array( 'a', 'b' ) ),
+               );
+       }
+
+       /**
+        * @covers FormatJson::parse
+        * @covers FormatJson::stripComments
+        * @dataProvider provideParseStripComments
+        * @param string $json
+        * @param mixed $expect
+        */
+       public function testParseStripComments( $json, $expect ) {
+               $st = FormatJson::parse( $json, FormatJson::STRIP_COMMENTS );
+               $this->assertType( 'Status', $st );
+               $this->assertTrue( $st->isGood() );
+               $this->assertEquals( $expect, $st->getValue() );
+       }
+
        /**
         * Generate a set of test cases for a particular combination of encoder options.
         *