StripState testing and cleanup
authorTim Starling <tstarling@wikimedia.org>
Wed, 28 Feb 2018 09:42:40 +0000 (20:42 +1100)
committerTim Starling <tstarling@wikimedia.org>
Mon, 5 Mar 2018 05:43:58 +0000 (16:43 +1100)
* Added StripState unit tests
* Deprecated unmaintained "half-parsed" serialization experiment
* Renamed some variables for brevity and removed unused "prefix"

Change-Id: I838d7ac7f9a2189e13d39c6939dba5d70e74a6b7

RELEASE-NOTES-1.31
includes/parser/Parser.php
includes/parser/StripState.php
tests/phpunit/includes/parser/StripStateTest.php [new file with mode: 0644]

index 5e9ce8b..83669e6 100644 (file)
@@ -279,6 +279,12 @@ changes to languages because of Phabricator reports.
   * ::setExtendedLoginCookie()
   Note that User::setCookies() remains, and is not deprecated.
 * The global functions wfProfileIn and wfProfileOut, deprecated in 1.25, have been removed.
+* The following methods related to caching of half-parsed HTML were deprecated:
+  * Parser::serializeHalfParsedText()
+  * Parser::unserializeHalfParsedText()
+  * Parser::isValidHalfParsedText()
+  * StripState::getSubState()
+  * StripState::merge()
 
 == Compatibility ==
 MediaWiki 1.31 requires PHP 5.5.9 or later. Although HHVM 3.18.5 or later is supported,
index c775cd9..8e5dcbd 100644 (file)
@@ -5998,11 +5998,13 @@ class Parser {
         * unserializeHalfParsedText(). The text can then be safely incorporated into
         * the return value of a parser hook.
         *
+        * @deprecated since 1.31
         * @param string $text
         *
         * @return array
         */
        public function serializeHalfParsedText( $text ) {
+               wfDeprecated( __METHOD__, '1.31' );
                $data = [
                        'text' => $text,
                        'version' => self::HALF_PARSED_VERSION,
@@ -6023,11 +6025,13 @@ class Parser {
         * If the $data array has been stored persistently, the caller should first
         * check whether it is still valid, by calling isValidHalfParsedText().
         *
+        * @deprecated since 1.31
         * @param array $data Serialized data
         * @throws MWException
         * @return string
         */
        public function unserializeHalfParsedText( $data ) {
+               wfDeprecated( __METHOD__, '1.31' );
                if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
                        throw new MWException( __METHOD__ . ': invalid version' );
                }
@@ -6048,11 +6052,13 @@ class Parser {
         * serializeHalfParsedText(), is compatible with the current version of the
         * parser.
         *
+        * @deprecated since 1.31
         * @param array $data
         *
         * @return bool
         */
        public function isValidHalfParsedText( $data ) {
+               wfDeprecated( __METHOD__, '1.31' );
                return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
        }
 
index 9f064df..d329f69 100644 (file)
  * @ingroup Parser
  */
 class StripState {
-       protected $prefix;
        protected $data;
        protected $regex;
 
        protected $parser;
 
        protected $circularRefGuard;
-       protected $recursionLevel = 0;
-       protected $highestRecursionLevel = 0;
+       protected $depth = 0;
+       protected $highestDepth = 0;
        protected $expandSize = 0;
 
-       const UNSTRIP_RECURSION_LIMIT = 20;
-       const UNSTRIP_SIZE_LIMIT = 5000000;
+       protected $depthLimit = 20;
+       protected $sizeLimit = 5000000;
 
        /**
         * @param Parser|null $parser
+        * @param array $options
         */
-       public function __construct( Parser $parser = null ) {
+       public function __construct( Parser $parser = null, $options = [] ) {
                $this->data = [
                        'nowiki' => [],
                        'general' => []
@@ -51,6 +51,13 @@ class StripState {
                $this->regex = '/' . Parser::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . Parser::MARKER_SUFFIX . '/';
                $this->circularRefGuard = [];
                $this->parser = $parser;
+
+               if ( isset( $options['depthLimit'] ) ) {
+                       $this->depthLimit = $options['depthLimit'];
+               }
+               if ( isset( $options['sizeLimit'] ) ) {
+                       $this->sizeLimit = $options['sizeLimit'];
+               }
        }
 
        /**
@@ -128,12 +135,11 @@ class StripState {
                                        return $this->getWarning( 'parser-unstrip-loop-warning' );
                                }
 
-                               if ( $this->recursionLevel > $this->highestRecursionLevel ) {
-                                       $this->highestRecursionLevel = $this->recursionLevel;
+                               if ( $this->depth > $this->highestDepth ) {
+                                       $this->highestDepth = $this->depth;
                                }
-                               if ( $this->recursionLevel >= self::UNSTRIP_RECURSION_LIMIT ) {
-                                       return $this->getLimitationWarning( 'unstrip-depth',
-                                               self::UNSTRIP_RECURSION_LIMIT );
+                               if ( $this->depth >= $this->depthLimit ) {
+                                       return $this->getLimitationWarning( 'unstrip-depth', $this->depthLimit );
                                }
 
                                $value = $this->data[$type][$marker];
@@ -142,15 +148,14 @@ class StripState {
                                }
 
                                $this->expandSize += strlen( $value );
-                               if ( $this->expandSize > self::UNSTRIP_SIZE_LIMIT ) {
-                                       return $this->getLimitationWarning( 'unstrip-size',
-                                               self::UNSTRIP_SIZE_LIMIT );
+                               if ( $this->expandSize > $this->sizeLimit ) {
+                                       return $this->getLimitationWarning( 'unstrip-size', $this->sizeLimit );
                                }
 
                                $this->circularRefGuard[$marker] = true;
-                               $this->recursionLevel++;
+                               $this->depth++;
                                $ret = $this->unstripType( $type, $value );
-                               $this->recursionLevel--;
+                               $this->depth--;
                                unset( $this->circularRefGuard[$marker] );
 
                                return $ret;
@@ -201,14 +206,14 @@ class StripState {
                return [
                        [ 'limitreport-unstrip-depth',
                                [
-                                       $this->highestRecursionLevel,
-                                       self::UNSTRIP_RECURSION_LIMIT
+                                       $this->highestDepth,
+                                       $this->depthLimit
                                ],
                        ],
                        [ 'limitreport-unstrip-size',
                                [
                                        $this->expandSize,
-                                       self::UNSTRIP_SIZE_LIMIT
+                                       $this->sizeLimit
                                ],
                        ]
                ];
@@ -218,11 +223,13 @@ class StripState {
         * Get a StripState object which is sufficient to unstrip the given text.
         * It will contain the minimum subset of strip items necessary.
         *
+        * @deprecated since 1.31
         * @param string $text
-        *
         * @return StripState
         */
        public function getSubState( $text ) {
+               wfDeprecated( __METHOD__, '1.31' );
+
                $subState = new StripState;
                $pos = 0;
                while ( true ) {
@@ -254,11 +261,14 @@ class StripState {
         * will not be preserved. The strings in the $texts array will have their
         * strip markers rewritten, the resulting array of strings will be returned.
         *
+        * @deprecated since 1.31
         * @param StripState $otherState
         * @param array $texts
         * @return array
         */
        public function merge( $otherState, $texts ) {
+               wfDeprecated( __METHOD__, '1.31' );
+
                $mergePrefix = wfRandomString( 16 );
 
                foreach ( $otherState->data as $type => $items ) {
diff --git a/tests/phpunit/includes/parser/StripStateTest.php b/tests/phpunit/includes/parser/StripStateTest.php
new file mode 100644 (file)
index 0000000..0f4f6e0
--- /dev/null
@@ -0,0 +1,136 @@
+<?php
+
+/**
+ * @covers StripState
+ */
+class StripStateTest extends MediaWikiTestCase {
+       public function setUp() {
+               parent::setUp();
+               $this->setContentLang( 'qqx' );
+       }
+
+       private function getMarker() {
+               static $i;
+               return Parser::MARKER_PREFIX . '-blah-' . sprintf( '%08X', $i++ ) . Parser::MARKER_SUFFIX;
+       }
+
+       private static function getWarning( $message, $max = '' ) {
+               return "<span class=\"error\">($message: $max)</span>";
+       }
+
+       public function testAddNoWiki() {
+               $ss = new StripState;
+               $marker = $this->getMarker();
+               $ss->addNoWiki( $marker, '<>' );
+               $text = "x{$marker}y";
+               $text = $ss->unstripGeneral( $text );
+               $text = str_replace( '<', '', $text );
+               $text = $ss->unstripNoWiki( $text );
+               $this->assertSame( 'x<>y', $text );
+       }
+
+       public function testAddGeneral() {
+               $ss = new StripState;
+               $marker = $this->getMarker();
+               $ss->addGeneral( $marker, '<>' );
+               $text = "x{$marker}y";
+               $text = $ss->unstripNoWiki( $text );
+               $text = str_replace( '<', '', $text );
+               $text = $ss->unstripGeneral( $text );
+               $this->assertSame( 'x<>y', $text );
+       }
+
+       public function testUnstripBoth() {
+               $ss = new StripState;
+               $mk1 = $this->getMarker();
+               $mk2 = $this->getMarker();
+               $ss->addNoWiki( $mk1, '<1>' );
+               $ss->addGeneral( $mk2, '<2>' );
+               $text = "x{$mk1}{$mk2}y";
+               $text = str_replace( '<', '', $text );
+               $text = $ss->unstripBoth( $text );
+               $this->assertSame( 'x<1><2>y', $text );
+       }
+
+       public static function provideUnstripRecursive() {
+               return [
+                       [ 0, 'text' ],
+                       [ 1, '=text=' ],
+                       [ 2, '==text==' ],
+                       [ 3, '==' . self::getWarning( 'unstrip-depth-warning', 2 ) . '==' ],
+               ];
+       }
+
+       /** @dataProvider provideUnstripRecursive */
+       public function testUnstripRecursive( $depth, $expected ) {
+               $ss = new StripState( null, [ 'depthLimit' => 2 ] );
+               $text = 'text';
+               for ( $i = 0; $i < $depth; $i++ ) {
+                       $mk = $this->getMarker();
+                       $ss->addNoWiki( $mk, "={$text}=" );
+                       $text = $mk;
+               }
+               $text = $ss->unstripNoWiki( $text );
+               $this->assertSame( $expected, $text );
+       }
+
+       public function testUnstripLoop() {
+               $ss = new StripState( null, [ 'depthLimit' => 2 ] );
+               $mk = $this->getMarker();
+               $ss->addNoWiki( $mk, $mk );
+               $text = $ss->unstripNoWiki( $mk );
+               $this->assertSame( self::getWarning( 'parser-unstrip-loop-warning' ), $text );
+       }
+
+       public static function provideUnstripSize() {
+               return [
+                       [ 0, 'x' ],
+                       [ 1, 'xx' ],
+                       [ 2, str_repeat( self::getWarning( 'unstrip-size-warning', 5 ), 2 ) ]
+               ];
+       }
+
+       /** @dataProvider provideUnstripSize */
+       public function testUnstripSize( $depth, $expected ) {
+               $ss = new StripState( null, [ 'sizeLimit' => 5 ] );
+               $text = 'x';
+               for ( $i = 0; $i < $depth; $i++ ) {
+                       $mk = $this->getMarker();
+                       $ss->addNoWiki( $mk, $text );
+                       $text = "$mk$mk";
+               }
+               $text = $ss->unstripNoWiki( $text );
+               $this->assertSame( $expected, $text );
+       }
+
+       public function provideGetLimitReport() {
+               for ( $i = 1; $i < 4; $i++ ) {
+                       yield [ $i ];
+               }
+       }
+
+       /** @dataProvider provideGetLimitReport */
+       public function testGetLimitReport( $depth ) {
+               $sizeLimit = 100000;
+               $ss = new StripState( null, [ 'depthLimit' => 5, 'sizeLimit' => $sizeLimit ] );
+               $text = 'x';
+               for ( $i = 0; $i < $depth; $i++ ) {
+                       $mk = $this->getMarker();
+                       $ss->addNoWiki( $mk, $text );
+                       $text = "$mk$mk";
+               }
+               $text = $ss->unstripNoWiki( $text );
+               $report = $ss->getLimitReport();
+               $messages = [];
+               foreach ( $report as list( $msg, $params ) ) {
+                       $messages[$msg] = $params;
+               }
+               $this->assertSame( [ $depth - 1, 5 ], $messages['limitreport-unstrip-depth'] );
+               $this->assertSame(
+                       [
+                               strlen( $this->getMarker() ) * 2 * ( pow( 2, $depth ) - 2 ) + pow( 2, $depth ),
+                               $sizeLimit
+                       ],
+                       $messages['limitreport-unstrip-size' ] );
+       }
+}