From 6db35b3c98ee807c71d8187ea45d69999df63b38 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Wed, 17 Oct 2018 10:05:02 -0400 Subject: [PATCH] Remove most support for configuring Tidy, including Raggett Remex is pure PHP so there is no reason to use an external tidy any more. Configuration variables and implementation classes were deprecated in 1.32 or earlier. We've kept only $wgTidyConfig which can be used for experimental features or debugging Remex. Bug: T198214 Change-Id: I99d48f858d97b6e1d1e6cd76a42c960cc2c61f9f --- .phpcs.xml | 1 - RELEASE-NOTES-1.33 | 8 ++ includes/DefaultSettings.php | 80 +++----------- includes/parser/MWTidy.php | 64 ++--------- includes/tidy/RaggettBase.php | 60 ----------- includes/tidy/RaggettExternal.php | 76 ------------- includes/tidy/RaggettInternalHHVM.php | 32 ------ includes/tidy/RaggettInternalPHP.php | 55 ---------- includes/tidy/RaggettWrapper.php | 101 ------------------ includes/tidy/tidy.conf | 24 ----- maintenance/dictionary/mediawiki.dic | 1 - tests/common/TestsAutoLoader.php | 1 - tests/parser/ParserTestRunner.php | 21 +--- tests/parser/TidySupport.php | 77 ------------- .../phpunit/includes/parser/SanitizerTest.php | 2 + 15 files changed, 37 insertions(+), 566 deletions(-) delete mode 100644 includes/tidy/RaggettBase.php delete mode 100644 includes/tidy/RaggettExternal.php delete mode 100644 includes/tidy/RaggettInternalHHVM.php delete mode 100644 includes/tidy/RaggettInternalPHP.php delete mode 100644 includes/tidy/RaggettWrapper.php delete mode 100644 includes/tidy/tidy.conf delete mode 100644 tests/parser/TidySupport.php diff --git a/.phpcs.xml b/.phpcs.xml index 2bce5b2a59..c0154c7543 100644 --- a/.phpcs.xml +++ b/.phpcs.xml @@ -323,7 +323,6 @@ includes/export/DumpPipeOutput\.php includes/resourceloader/ResourceLoaderImage\.php includes/shell/Command\.php - includes/tidy/RaggettExternal\.php maintenance/dumpTextPass\.php maintenance/mysql\.php maintenance/storage/recompressTracked\.php diff --git a/RELEASE-NOTES-1.33 b/RELEASE-NOTES-1.33 index c573a5951c..1b9b7238ab 100644 --- a/RELEASE-NOTES-1.33 +++ b/RELEASE-NOTES-1.33 @@ -26,6 +26,11 @@ production. * (T199334) $wgTagStatisticsNewTable — This temporary setting, added in MediaWiki 1.32, has now been removed. When loading Special:Tags, MediaWiki will now always use the `change_tag_def` instead of the `change_tag` table. +* MediaWiki now always tidies user output, and most related + configuration has been removed. Thus $wgUseTidy, $wgTidyBin, + $wgTidyConf, $wgTidyOpts, $wgTidyInternal, and $wgDebugTidy, all + deprecated since 1.26, have now all been removed. The $wgTidyConfig + setting remains only for Remex experimental features or debugging. * … === New features in 1.33 === @@ -92,6 +97,9 @@ because of Phabricator reports. * ParserOptions defaults 'tidy' to true now, since the untidy modes of the parser are being deprecated and ParserOptions::getCanonicalOverrides() has always been true at any rate. +* Support for disabling tidy and external tidy implementations has been removed. + This was deprecated in 1.32. The pure PHP Remex tidy implementation is now + used and no configuration is necessary. * A number of deprecated methods for API documentation, intended for overriding by extensions, are no longer called by MediaWiki, and will emit deprecation notices if your extension attempts to use them: diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 15cb5bf3e3..2104a78816 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -4281,74 +4281,24 @@ $wgAllowImageTag = false; /** * Configuration for HTML postprocessing tool. Set this to a configuration * array to enable an external tool. By default, we now use the RemexHtml - * library; historically, Dave Raggett's "HTML Tidy" was typically used. - * See https://www.w3.org/People/Raggett/tidy/ - * - * Setting this to null is deprecated. - * - * If this is null and $wgUseTidy is true, the deprecated configuration - * parameters will be used instead. - * - * If this is null and $wgUseTidy is false, a pure PHP fallback will be used. - * (Equivalent to setting `$wgTidyConfig['driver'] = 'disabled'`.) - * - * Keys are: - * - driver: May be: - * - RemexHtml: Use the RemexHtml library in PHP - * - RaggettInternalHHVM: Use the limited-functionality HHVM extension - * Deprecated since 1.32. - * - RaggettInternalPHP: Use the PECL extension - * Deprecated since 1.32. - * - RaggettExternal: Shell out to an external binary (tidyBin) - * Deprecated since 1.32. - * - disabled: Disable tidy pass and use a hacky pure PHP workaround - * (this is what setting $wgUseTidy to false used to do) - * Deprecated since 1.32. - * - * - tidyConfigFile: Path to configuration file for any of the Raggett drivers - * - debugComment: True to add a comment to the output with warning messages - * - tidyBin: For RaggettExternal, the path to the tidy binary. - * - tidyCommandLine: For RaggettExternal, additional command line options. + * library; historically, other postprocessors were used. + * + * Setting this to null will use default settings. + * + * Keys include: + * - driver: formerly used to select a postprocessor; now ignored. + * - treeMutationTrace: a boolean to turn on Remex tracing + * - serializerTrace: a boolean to turn on Remex tracing + * - mungerTrace: a boolean to turn on Remex tracing + * - pwrap: whether

wrapping should be done (default true) + * + * See includes/tidy/RemexDriver.php for detail on configuration. + * + * Overriding the default configuration is strongly discouraged in + * production. */ $wgTidyConfig = [ 'driver' => 'RemexHtml' ]; -/** - * Set this to true to use the deprecated tidy configuration parameters. - * @deprecated since 1.26, use $wgTidyConfig['driver'] = 'disabled' - */ -$wgUseTidy = false; - -/** - * The path to the tidy binary. - * @deprecated since 1.26, use $wgTidyConfig['tidyBin'] - */ -$wgTidyBin = 'tidy'; - -/** - * The path to the tidy config file - * @deprecated since 1.26, use $wgTidyConfig['tidyConfigFile'] - */ -$wgTidyConf = $IP . '/includes/tidy/tidy.conf'; - -/** - * The command line options to the tidy binary - * @deprecated since 1.26, use $wgTidyConfig['tidyCommandLine'] - */ -$wgTidyOpts = ''; - -/** - * Set this to true to use the tidy extension - * @deprecated since 1.26, use $wgTidyConfig['driver'] - */ -$wgTidyInternal = extension_loaded( 'tidy' ); - -/** - * Put tidy warnings in HTML comments - * Only works for internal tidy. - * @deprecated since 1.26, use $wgTidyConfig['debugComment'] - */ -$wgDebugTidy = false; - /** * Allow raw, unchecked HTML in "..." sections. * THIS IS VERY DANGEROUS on a publicly editable site, so USE wgGroupPermissions diff --git a/includes/parser/MWTidy.php b/includes/parser/MWTidy.php index 8881786067..fce67fdd1e 100644 --- a/includes/parser/MWTidy.php +++ b/includes/parser/MWTidy.php @@ -22,11 +22,7 @@ */ /** - * Class to interact with HTML tidy - * - * Either the external tidy program or the in-process tidy extension - * will be used depending on availability. Override the default - * $wgTidyInternal setting to disable the internal if it's not working. + * Class to interact with and configure Remex tidy * * @ingroup Parser */ @@ -34,7 +30,7 @@ class MWTidy { private static $instance; /** - * Interface with html tidy. + * Interface with Remex tidy. * If tidy isn't able to correct the markup, the original will be * returned in all its glory with a warning comment appended. * @@ -63,34 +59,9 @@ class MWTidy { * @return bool|\MediaWiki\Tidy\TidyDriverBase */ public static function singleton() { - global $wgUseTidy, $wgTidyInternal, $wgTidyConf, $wgDebugTidy, $wgTidyConfig, - $wgTidyBin, $wgTidyOpts; - + global $wgTidyConfig; if ( self::$instance === null ) { - if ( $wgTidyConfig !== null ) { - $config = $wgTidyConfig; - } elseif ( $wgUseTidy ) { - // b/c configuration - wfDeprecated( '$wgUseTidy', '1.26' ); - $config = [ - 'tidyConfigFile' => $wgTidyConf, - 'debugComment' => $wgDebugTidy, - 'tidyBin' => $wgTidyBin, - 'tidyCommandLine' => $wgTidyOpts ]; - if ( $wgTidyInternal ) { - if ( wfIsHHVM() ) { - $config['driver'] = 'RaggettInternalHHVM'; - } else { - $config['driver'] = 'RaggettInternalPHP'; - } - } else { - $config['driver'] = 'RaggettExternal'; - } - } else { - wfDeprecated( '$wgTidyConfig = null and $wgUseTidy = false', '1.26' ); - return false; - } - self::$instance = self::factory( $config ); + self::$instance = self::factory( $wgTidyConfig ); } return self::$instance; } @@ -98,38 +69,21 @@ class MWTidy { /** * Create a new Tidy driver object from configuration. * @see $wgTidyConfig - * @param array $config + * @param array|null $config Optional since 1.33 * @return bool|\MediaWiki\Tidy\TidyDriverBase * @throws MWException */ - public static function factory( array $config ) { - switch ( $config['driver'] ) { - case 'RaggettInternalHHVM': - $instance = new MediaWiki\Tidy\RaggettInternalHHVM( $config ); - break; - case 'RaggettInternalPHP': - $instance = new MediaWiki\Tidy\RaggettInternalPHP( $config ); - break; - case 'RaggettExternal': - $instance = new MediaWiki\Tidy\RaggettExternal( $config ); - break; - case 'RemexHtml': - $instance = new MediaWiki\Tidy\RemexDriver( $config ); - break; - case 'disabled': - wfDeprecated( '"disabled" tidy driver', '1.32' ); - return false; - default: - throw new MWException( "Invalid tidy driver: \"{$config['driver']}\"" ); - } - return $instance; + public static function factory( array $config = null ) { + return new MediaWiki\Tidy\RemexDriver( $config ?? [] ); } /** * Set the driver to be used. This is for testing. * @param MediaWiki\Tidy\TidyDriverBase|false|null $instance + * @deprecated Since 1.33 */ public static function setInstance( $instance ) { + wfDeprecated( __METHOD__, '1.33' ); self::$instance = $instance; } diff --git a/includes/tidy/RaggettBase.php b/includes/tidy/RaggettBase.php deleted file mode 100644 index 878099ff7d..0000000000 --- a/includes/tidy/RaggettBase.php +++ /dev/null @@ -1,60 +0,0 @@ -getWrapped( $text ); - - $retVal = null; - $correctedtext = $this->cleanWrapped( $wrappedtext, false, $retVal ); - - if ( $retVal < 0 ) { - wfDebug( "Possible tidy configuration error!\n" ); - return $text . "\n\n"; - } elseif ( is_null( $correctedtext ) ) { - wfDebug( "Tidy error detected!\n" ); - return $text . "\n\n"; - } - - $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens - - return $correctedtext; - } - - public function validate( $text, &$errorStr ) { - $retval = 0; - $errorStr = $this->cleanWrapped( $text, true, $retval ); - return ( $retval < 0 && $errorStr == '' ) || $retval == 0; - } - - /** - * Perform a clean/repair operation - * @param string $text HTML to check - * @param bool $stderr Whether to read result from STDERR rather than STDOUT - * @param int|null &$retval Exit code (-1 on internal error) - * @return null|string - * @throws MWException - */ - abstract protected function cleanWrapped( $text, $stderr = false, &$retval = null ); -} diff --git a/includes/tidy/RaggettExternal.php b/includes/tidy/RaggettExternal.php deleted file mode 100644 index 0b485c7cc6..0000000000 --- a/includes/tidy/RaggettExternal.php +++ /dev/null @@ -1,76 +0,0 @@ - [ 'pipe', 'r' ], - 1 => [ 'file', wfGetNull(), 'a' ], - 2 => [ 'pipe', 'w' ] - ]; - } else { - $descriptorspec = [ - 0 => [ 'pipe', 'r' ], - 1 => [ 'pipe', 'w' ], - 2 => [ 'file', wfGetNull(), 'a' ] - ]; - } - - $readpipe = $stderr ? 2 : 1; - $pipes = []; - - $process = proc_open( - "{$this->config['tidyBin']} -config {$this->config['tidyConfigFile']} " . - $this->config['tidyCommandLine'] . $opts, $descriptorspec, $pipes ); - - // NOTE: At least on linux, the process will be created even if tidy is not installed. - // This means that missing tidy will be treated as a validation failure. - - if ( is_resource( $process ) ) { - // Theoretically, this style of communication could cause a deadlock - // here. If the stdout buffer fills up, then writes to stdin could - // block. This doesn't appear to happen with tidy, because tidy only - // writes to stdout after it's finished reading from stdin. Search - // for tidyParseStdin and tidySaveStdout in console/tidy.c - fwrite( $pipes[0], $text ); - fclose( $pipes[0] ); - while ( !feof( $pipes[$readpipe] ) ) { - $cleansource .= fgets( $pipes[$readpipe], 1024 ); - } - fclose( $pipes[$readpipe] ); - $retval = proc_close( $process ); - } else { - wfWarn( "Unable to start external tidy process" ); - $retval = -1; - } - - if ( !$stderr && $cleansource == '' && $text != '' ) { - // Some kind of error happened, so we couldn't get the corrected text. - // Just give up; we'll use the source text and append a warning. - $cleansource = null; - } - - return $cleansource; - } - - public function supportsValidate() { - return true; - } -} diff --git a/includes/tidy/RaggettInternalHHVM.php b/includes/tidy/RaggettInternalHHVM.php deleted file mode 100644 index 1681dc45e4..0000000000 --- a/includes/tidy/RaggettInternalHHVM.php +++ /dev/null @@ -1,32 +0,0 @@ -config['tidyConfigFile'], 'utf8' ); - if ( $cleansource === false ) { - $cleansource = null; - $retval = -1; - } else { - $retval = 0; - } - - return $cleansource; - } -} diff --git a/includes/tidy/RaggettInternalPHP.php b/includes/tidy/RaggettInternalPHP.php deleted file mode 100644 index c1050cc222..0000000000 --- a/includes/tidy/RaggettInternalPHP.php +++ /dev/null @@ -1,55 +0,0 @@ -parseString( $text, $this->config['tidyConfigFile'], 'utf8' ); - - if ( $stderr ) { - $retval = $tidy->getStatus(); - return $tidy->errorBuffer; - } - - $tidy->cleanRepair(); - $retval = $tidy->getStatus(); - if ( $retval == 2 ) { - // 2 is magic number for fatal error - // https://secure.php.net/manual/en/tidy.getstatus.php - $cleansource = null; - } else { - $cleansource = tidy_get_output( $tidy ); - if ( !empty( $this->config['debugComment'] ) && $retval > 0 ) { - $cleansource .= "', '-->', $tidy->errorBuffer ) . - "\n-->"; - } - } - - return $cleansource; - } - - public function supportsValidate() { - return true; - } -} diff --git a/includes/tidy/RaggettWrapper.php b/includes/tidy/RaggettWrapper.php deleted file mode 100644 index 855282d342..0000000000 --- a/includes/tidy/RaggettWrapper.php +++ /dev/null @@ -1,101 +0,0 @@ -mTokens = []; - $this->mMarkerIndex = 0; - - // Replace elements with placeholders - $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, - [ $this, 'replaceCallback' ], $text ); - // ...and markers - $wrappedtext = preg_replace_callback( '/\<\\/?mw:toc\>/', - [ $this, 'replaceCallback' ], $wrappedtext ); - // ... and tags - $wrappedtext = preg_replace_callback( '/\/s', - [ $this, 'replaceCallback' ], $wrappedtext ); - // Modify inline Microdata and elements so they say and so - // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config - $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', ' tags, but those aren't empty. - $wrappedtext = preg_replace_callback( '!]*)>(.*?)!s', function ( $m ) { - return '' - . $this->replaceCallback( [ $m[2] ] ) - . ''; - }, $wrappedtext ); - - // Preserve empty li elements (T49673) by abusing Tidy's datafld hack - // The whitespace class is as in TY_(InitMap) - $wrappedtext = preg_replace( "!

  • ([ \r\n\t\f]*)
  • !", - '
  • \1
  • ', $wrappedtext ); - - // Wrap the whole thing in a doctype and body for Tidy. - $wrappedtext = '' . - 'test' . $wrappedtext . ''; - - return $wrappedtext; - } - - /** - * @param array $m - * @return string - */ - private function replaceCallback( array $m ) { - $marker = Parser::MARKER_PREFIX . "-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX; - $this->mMarkerIndex++; - $this->mTokens[$marker] = $m[0]; - return $marker; - } - - /** - * @param string $text - * @return string - */ - public function postprocess( $text ) { - // Revert back to <{link,meta,style}> - $text = preg_replace( '!]*?)(/{0,1}>)!', '<$1$2$3', $text ); - $text = preg_replace( '!<(/?)html-(style)([^>]*)>!', '<$1$2$3>', $text ); - - // Remove datafld - $text = str_replace( '
  • mTokens ); - - return $text; - } - -} diff --git a/includes/tidy/tidy.conf b/includes/tidy/tidy.conf deleted file mode 100644 index d4a3199367..0000000000 --- a/includes/tidy/tidy.conf +++ /dev/null @@ -1,24 +0,0 @@ -# html tidy (http://tidy.sf.net) configuration -# tidy - validate, correct, and pretty-print HTML files -# see: man 1 tidy, http://tidy.sourceforge.net/docs/quickref.html - -show-body-only: yes -force-output: yes -tidy-mark: no -wrap: 0 -wrap-attributes: no -literal-attributes: yes -output-xhtml: yes -numeric-entities: yes -enclose-text: yes -enclose-block-text: yes -quiet: yes -quote-nbsp: yes -fix-backslash: no -fix-uri: no -# Don't strip html5 elements we support -# html-{meta,link} is a hack we use to prevent Tidy from stripping and used in the body for Microdata -new-empty-tags: html-meta, html-link, wbr, source, track -new-inline-tags: video, audio, bdi, data, time, mark -# html-style is a hack we use to prevent pre-HTML5 versions of Tidy from stripping