Add 'unwrap' ParserOutput post-cache transform
authorBrad Jorsch <bjorsch@wikimedia.org>
Fri, 22 Dec 2017 18:32:49 +0000 (13:32 -0500)
committerGergő Tisza <tgr.huwiki@gmail.com>
Thu, 1 Feb 2018 22:24:27 +0000 (14:24 -0800)
And deprecate passing false for ParserOptions::setWrapOutputClass().

There are three cases for the Parser wrapper: the default
mw-parser-output, a custom wrapper, or no wrapper. As things currently
stand, we have to fragment the parser cache on each of these options,
which uses a nontrival amount of storage space (T167784).

Ideally we'd do all the wrapping as a post-cache transform, but
TemplateStyles needs to know the wrapper in use in order to properly
prefix its CSS rules (that's why we added the wrapper in the first
place). So, second best option is to make *un*wrapping be a post-cache
transform and make "custom wrapper" be uncacheable.

This patch does the first bit (unwrapping as a post-cache transform),
and a followup will do the second part once the deprecation process is
satisfied.

Bug: T181846
Change-Id: Iba16e78c41be992467101e7d83e9c3134765b101

12 files changed:
RELEASE-NOTES-1.31
includes/Message.php
includes/api/ApiParse.php
includes/cache/MessageCache.php
includes/installer/Installer.php
includes/parser/ParserOptions.php
includes/parser/ParserOutput.php
tests/parser/ParserTestRunner.php
tests/phpunit/includes/ExtraParserTest.php
tests/phpunit/includes/parser/ParserOptionsTest.php
tests/phpunit/includes/parser/ParserOutputTest.php
tests/phpunit/includes/parser/TagHooksTest.php

index 5314925..ad24852 100644 (file)
@@ -194,6 +194,8 @@ changes to languages because of Phabricator reports.
   Setting template variables by reference allowed violating the principle of data being
   immutable once added to the skin template. In practice, this method was not being
   used for that. Rather, setRef() existed as memory optimisation for PHP 4.
+* Passing false to ParserOptions::setWrapOutputClass() is deprecated. Use the
+  'unwrap' transform to ParserOutput::getText() instead.
 
 == Compatibility ==
 MediaWiki 1.31 requires PHP 5.5.9 or later. Although HHVM 3.18.5 or later is supported,
index e55eaaf..fac9a59 100644 (file)
@@ -1245,7 +1245,14 @@ class Message implements MessageSpecifier, Serializable {
                );
 
                return $out instanceof ParserOutput
-                       ? $out->getText( [ 'enableSectionEditLinks' => false ] )
+                       ? $out->getText( [
+                               'enableSectionEditLinks' => false,
+                               // Wrapping messages in an extra <div> is probably not expected. If
+                               // they're outside the content area they probably shouldn't be
+                               // targeted by CSS that's targeting the parser output, and if
+                               // they're inside they already are from the outer div.
+                               'unwrap' => true,
+                       ] )
                        : $out;
        }
 
index cf1fd1e..2839ab9 100644 (file)
@@ -344,6 +344,7 @@ class ApiParse extends ApiBase {
                        $result_array['text'] = $p_result->getText( [
                                'allowTOC' => !$params['disabletoc'],
                                'enableSectionEditLinks' => !$params['disableeditsection'],
+                               'unwrap' => $params['wrapoutputclass'] === '',
                        ] );
                        $result_array[ApiResult::META_BC_SUBELEMENTS][] = 'text';
                }
@@ -538,9 +539,9 @@ class ApiParse extends ApiBase {
                if ( $params['disabletidy'] ) {
                        $popts->setTidy( false );
                }
-               $popts->setWrapOutputClass(
-                       $params['wrapoutputclass'] === '' ? false : $params['wrapoutputclass']
-               );
+               if ( $params['wrapoutputclass'] !== '' ) {
+                       $popts->setWrapOutputClass( $params['wrapoutputclass'] );
+               }
 
                $reset = null;
                $suppressCache = false;
index c9615b1..63c03af 100644 (file)
@@ -193,7 +193,6 @@ class MessageCache {
                                $po = ParserOptions::newFromAnon();
                                $po->setEditSection( false );
                                $po->setAllowUnsafeRawHtml( false );
-                               $po->setWrapOutputClass( false );
                                return $po;
                        }
 
@@ -203,11 +202,6 @@ class MessageCache {
                        // from malicious sources. As a precaution, disable
                        // the <html> parser tag when parsing messages.
                        $this->mParserOptions->setAllowUnsafeRawHtml( false );
-                       // Wrapping messages in an extra <div> is probably not expected. If
-                       // they're outside the content area they probably shouldn't be
-                       // targeted by CSS that's targeting the parser output, and if
-                       // they're inside they already are from the outer div.
-                       $this->mParserOptions->setWrapOutputClass( false );
                }
 
                return $this->mParserOptions;
index 5e018e0..e42146d 100644 (file)
@@ -447,7 +447,6 @@ abstract class Installer {
                $this->parserTitle = Title::newFromText( 'Installer' );
                $this->parserOptions = new ParserOptions( $wgUser ); // language will be wrong :(
                $this->parserOptions->setEditSection( false );
-               $this->parserOptions->setWrapOutputClass( false );
                // Don't try to access DB before user language is initialised
                $this->setParserLanguage( Language::factory( 'en' ) );
        }
@@ -689,6 +688,7 @@ abstract class Installer {
                        $out = $wgParser->parse( $text, $this->parserTitle, $this->parserOptions, $lineStart );
                        $html = $out->getText( [
                                'enableSectionEditLinks' => false,
+                               'unwrap' => true,
                        ] );
                } catch ( MediaWiki\Services\ServiceDisabledException $e ) {
                        $html = '<!--DB access attempted during parse-->  ' . htmlspecialchars( $text );
index 2f284af..1405c45 100644 (file)
@@ -781,6 +781,7 @@ class ParserOptions {
         * CSS class to use to wrap output from Parser::parse()
         * @since 1.30
         * @param string|bool $className Set false to disable wrapping.
+        *   Passing false is deprecated since MediaWiki 1.31
         * @return string|bool Current value
         */
        public function setWrapOutputClass( $className ) {
index 153a770..8f7ed24 100644 (file)
  */
 class ParserOutput extends CacheTime {
        /**
-        * Feature flag to indicate to extensions that MediaWiki core supports and
+        * Feature flags to indicate to extensions that MediaWiki core supports and
         * uses getText() stateless transforms.
         */
        const SUPPORTS_STATELESS_TRANSFORMS = 1;
+       const SUPPORTS_UNWRAP_TRANSFORM = 1;
 
        /**
         * @var string $mText The output text
@@ -266,8 +267,9 @@ class ParserOutput extends CacheTime {
         *     to generate one and `__NOTOC__` wasn't used. Default is true,
         *     but might be statefully overridden.
         *  - enableSectionEditLinks: (bool) Include section edit links, assuming
-        *    section edit link tokens are present in the HTML. Default is true,
+        *     section edit link tokens are present in the HTML. Default is true,
         *     but might be statefully overridden.
+        *  - unwrap: (bool) Remove a wrapping mw-parser-output div. Default is false.
         * @return string HTML
         */
        public function getText( $options = [] ) {
@@ -284,11 +286,25 @@ class ParserOutput extends CacheTime {
                        // In that situation, the historical behavior (possibly buggy) is to remove the TOC.
                        'allowTOC' => !empty( $this->mTOCEnabled ),
                        'enableSectionEditLinks' => $this->mEditSectionTokens,
+                       'unwrap' => false,
                ];
                $text = $this->mText;
 
                Hooks::runWithoutAbort( 'ParserOutputPostCacheTransform', [ $this, &$text, &$options ] );
 
+               if ( $options['unwrap'] !== false ) {
+                       $start = Html::openElement( 'div', [
+                               'class' => 'mw-parser-output'
+                       ] );
+                       $startLen = strlen( $start );
+                       $end = Html::closeElement( 'div' );
+                       $endLen = strlen( $end );
+
+                       if ( substr( $text, 0, $startLen ) === $start && substr( $text, -$endLen ) === $end ) {
+                               $text = substr( $text, $startLen, -$endLen );
+                       }
+               }
+
                if ( $options['enableSectionEditLinks'] ) {
                        $text = preg_replace_callback(
                                self::EDITSECTION_REGEX,
index 9b5897c..4dd4bc6 100644 (file)
@@ -811,10 +811,6 @@ class ParserTestRunner {
                $options = ParserOptions::newFromContext( $context );
                $options->setTimestamp( $this->getFakeTimestamp() );
 
-               if ( !isset( $opts['wrap'] ) ) {
-                       $options->setWrapOutputClass( false );
-               }
-
                if ( isset( $opts['tidy'] ) ) {
                        if ( !$this->tidySupport->isEnabled() ) {
                                $this->recorder->skipped( $test, 'tidy extension is not installed' );
@@ -854,7 +850,8 @@ class ParserTestRunner {
                } else {
                        $output = $parser->parse( $test['input'], $title, $options, true, true, 1337 );
                        $out = $output->getText( [
-                               'allowTOC' => !isset( $opts['notoc'] )
+                               'allowTOC' => !isset( $opts['notoc'] ),
+                               'unwrap' => !isset( $opts['wrap'] ),
                        ] );
                        if ( isset( $opts['tidy'] ) ) {
                                $out = preg_replace( '/\s+$/', '', $out );
index aaa135d..75ebd31 100644 (file)
@@ -26,7 +26,6 @@ class ExtraParserTest extends MediaWikiTestCase {
                // FIXME: This test should pass without setting global content language
                $this->options = ParserOptions::newFromUserAndLang( new User, $contLang );
                $this->options->setTemplateCallback( [ __CLASS__, 'statelessFetchTemplate' ] );
-               $this->options->setWrapOutputClass( false );
                $this->parser = new Parser;
 
                MagicWord::clearCache();
@@ -41,9 +40,8 @@ class ExtraParserTest extends MediaWikiTestCase {
 
                $title = Title::newFromText( 'Unit test' );
                $options = ParserOptions::newFromUser( new User() );
-               $options->setWrapOutputClass( false );
                $this->assertEquals( "<p>$longLine</p>",
-                       $this->parser->parse( $longLine, $title, $options )->getText() );
+                       $this->parser->parse( $longLine, $title, $options )->getText( [ 'unwrap' => true ] ) );
        }
 
        /**
@@ -55,7 +53,7 @@ class ExtraParserTest extends MediaWikiTestCase {
                $parserOutput = $this->parser->parse( "Test\n{{Foo}}\n{{Bar}}", $title, $this->options );
                $this->assertEquals(
                        "<p>Test\nContent of <i>Template:Foo</i>\nContent of <i>Template:Bar</i>\n</p>",
-                       $parserOutput->getText()
+                       $parserOutput->getText( [ 'unwrap' => true ] )
                );
        }
 
index 93ab35c..232b0bb 100644 (file)
@@ -62,7 +62,7 @@ class ParserOptionsTest extends MediaWikiTestCase {
                        'No overrides' => [ true, [] ],
                        'In-key options are ok' => [ true, [
                                'thumbsize' => 1e100,
-                               'wrapclass' => false,
+                               'printable' => false,
                        ] ],
                        'Non-in-key options are not ok' => [ false, [
                                'removeComments' => false,
@@ -102,7 +102,7 @@ class ParserOptionsTest extends MediaWikiTestCase {
        }
 
        public static function provideOptionsHash() {
-               $used = [ 'wrapclass', 'printable' ];
+               $used = [ 'thumbsize', 'printable' ];
 
                $classWrapper = TestingAccessWrapper::newFromClass( ParserOptions::class );
                $classWrapper->getDefaults();
@@ -116,9 +116,9 @@ class ParserOptionsTest extends MediaWikiTestCase {
                        'Canonical options, used some options' => [ $used, 'canonical', [] ],
                        'Used some options, non-default values' => [
                                $used,
-                               'printable=1!wrapclass=foobar',
+                               'printable=1!thumbsize=200',
                                [
-                                       'wrapclass' => 'foobar',
+                                       'thumbsize' => 200,
                                        'printable' => true,
                                ]
                        ],
index 9642bbc..8cc7ba1 100644 (file)
@@ -125,7 +125,7 @@ class ParserOutputTest extends MediaWikiTestCase {
        public static function provideGetText() {
                // phpcs:disable Generic.Files.LineLength
                $text = <<<EOF
-<p>Test document.
+<div class="mw-parser-output"><p>Test document.
 </p>
 <mw:toc><div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
 <ul>
@@ -150,13 +150,13 @@ class ParserOutputTest extends MediaWikiTestCase {
 </p>
 <h2><span class="mw-headline" id="Section_3">Section 3</span><mw:editsection page="Test Page" section="4">Section 3</mw:editsection></h2>
 <p>Three
-</p>
+</p></div>
 EOF;
 
                return [
                        'No stateless options, default state' => [
                                [], [], $text, <<<EOF
-<p>Test document.
+<div class="mw-parser-output"><p>Test document.
 </p>
 <div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
 <ul>
@@ -181,12 +181,12 @@ EOF;
 </p>
 <h2><span class="mw-headline" id="Section_3">Section 3</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=4" title="Edit section: Section 3">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
 <p>Three
-</p>
+</p></div>
 EOF
                        ],
                        'No stateless options, TOC statefully disabled' => [
                                [], [ 'mTOCEnabled' => false ], $text, <<<EOF
-<p>Test document.
+<div class="mw-parser-output"><p>Test document.
 </p>
 
 <h2><span class="mw-headline" id="Section_1">Section 1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=1" title="Edit section: Section 1">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
@@ -200,12 +200,12 @@ EOF
 </p>
 <h2><span class="mw-headline" id="Section_3">Section 3</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=4" title="Edit section: Section 3">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
 <p>Three
-</p>
+</p></div>
 EOF
                        ],
                        'No stateless options, section edits statefully disabled' => [
                                [], [ 'mEditSectionTokens' => false ], $text, <<<EOF
-<p>Test document.
+<div class="mw-parser-output"><p>Test document.
 </p>
 <div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
 <ul>
@@ -230,14 +230,14 @@ EOF
 </p>
 <h2><span class="mw-headline" id="Section_3">Section 3</span></h2>
 <p>Three
-</p>
+</p></div>
 EOF
                        ],
                        'Stateless options override stateful settings' => [
                                [ 'allowTOC' => true, 'enableSectionEditLinks' => true ],
                                [ 'mTOCEnabled' => false, 'mEditSectionTokens' => false ],
                                $text, <<<EOF
-<p>Test document.
+<div class="mw-parser-output"><p>Test document.
 </p>
 <div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
 <ul>
@@ -262,12 +262,12 @@ EOF
 </p>
 <h2><span class="mw-headline" id="Section_3">Section 3</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=4" title="Edit section: Section 3">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
 <p>Three
-</p>
+</p></div>
 EOF
                        ],
                        'Statelessly disable section edit links' => [
                                [ 'enableSectionEditLinks' => false ], [], $text, <<<EOF
-<p>Test document.
+<div class="mw-parser-output"><p>Test document.
 </p>
 <div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
 <ul>
@@ -292,13 +292,43 @@ EOF
 </p>
 <h2><span class="mw-headline" id="Section_3">Section 3</span></h2>
 <p>Three
-</p>
+</p></div>
 EOF
                        ],
                        'Statelessly disable TOC' => [
                                [ 'allowTOC' => false ], [], $text, <<<EOF
+<div class="mw-parser-output"><p>Test document.
+</p>
+
+<h2><span class="mw-headline" id="Section_1">Section 1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=1" title="Edit section: Section 1">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>One
+</p>
+<h2><span class="mw-headline" id="Section_2">Section 2</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=2" title="Edit section: Section 2">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>Two
+</p>
+<h3><span class="mw-headline" id="Section_2.1">Section 2.1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=3" title="Edit section: Section 2.1">edit</a><span class="mw-editsection-bracket">]</span></span></h3>
+<p>Two point one
+</p>
+<h2><span class="mw-headline" id="Section_3">Section 3</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=4" title="Edit section: Section 3">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>Three
+</p></div>
+EOF
+                       ],
+                       'Statelessly unwrap text' => [
+                               [ 'unwrap' => true ], [], $text, <<<EOF
 <p>Test document.
 </p>
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Section_1"><span class="tocnumber">1</span> <span class="toctext">Section 1</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#Section_2"><span class="tocnumber">2</span> <span class="toctext">Section 2</span></a>
+<ul>
+<li class="toclevel-2 tocsection-3"><a href="#Section_2.1"><span class="tocnumber">2.1</span> <span class="toctext">Section 2.1</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1 tocsection-4"><a href="#Section_3"><span class="tocnumber">3</span> <span class="toctext">Section 3</span></a></li>
+</ul>
+</div>
 
 <h2><span class="mw-headline" id="Section_1">Section 1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=1" title="Edit section: Section 1">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
 <p>One
@@ -314,6 +344,9 @@ EOF
 </p>
 EOF
                        ],
+                       'Unwrap without a mw-parser-output wrapper' => [
+                               [ 'unwrap' => true ], [], '<div class="foobar">Content</div>', '<div class="foobar">Content</div>'
+                       ],
                ];
                // phpcs:enable
        }
index 7e31cba..2fdaa18 100644 (file)
@@ -46,7 +46,6 @@ class TagHooksTest extends MediaWikiTestCase {
        private function getParserOptions() {
                global $wgContLang;
                $popt = ParserOptions::newFromUserAndLang( new User, $wgContLang );
-               $popt->setWrapOutputClass( false );
                return $popt;
        }
 
@@ -63,7 +62,7 @@ class TagHooksTest extends MediaWikiTestCase {
                        Title::newFromText( 'Test' ),
                        $this->getParserOptions()
                );
-               $this->assertEquals( "<p>FooOneBaz\n</p>", $parserOutput->getText() );
+               $this->assertEquals( "<p>FooOneBaz\n</p>", $parserOutput->getText( [ 'unwrap' => true ] ) );
 
                $parser->mPreprocessor = null; # Break the Parser <-> Preprocessor cycle
        }
@@ -98,7 +97,7 @@ class TagHooksTest extends MediaWikiTestCase {
                        Title::newFromText( 'Test' ),
                        $this->getParserOptions()
                );
-               $this->assertEquals( "<p>FooOneBaz\n</p>", $parserOutput->getText() );
+               $this->assertEquals( "<p>FooOneBaz\n</p>", $parserOutput->getText( [ 'unwrap' => true ] ) );
 
                $parser->mPreprocessor = null; # Break the Parser <-> Preprocessor cycle
        }