From 98c2703f81083125a76c84a2721305dab6225907 Mon Sep 17 00:00:00 2001 From: Ori Livneh Date: Mon, 1 Dec 2014 17:10:52 -0800 Subject: [PATCH] Simplify MWTidy * Make the internal MWTidy::*clean() functions always return an array of two elements: the output buffer and the error buffer. * Make MWTidy::externalTidy() always read both stdout and stderr. We can read stderr after stdout because tidy.c produces output in the same order. * Remove the $stderr parameter from the private MWTidy::*clean() methods, since error output is always returned. * Merge MWTidy::phpClean and MWTidy::hhvmClean, since the difference between them is now small enough that splitting them up is not warranted. * On HHVM, MWTidy::internalTidy() always returns an empty string for the error buffer. Change-Id: I178b42d6ebdd1a5b9bd5921eb093a6c5014ffa49 --- includes/parser/MWTidy.php | 140 ++++++++++++++----------------------- 1 file changed, 51 insertions(+), 89 deletions(-) diff --git a/includes/parser/MWTidy.php b/includes/parser/MWTidy.php index 3cb35f7fc9..7b699d21e5 100644 --- a/includes/parser/MWTidy.php +++ b/includes/parser/MWTidy.php @@ -131,12 +131,12 @@ class MWTidy { $wrappedtext = $wrapper->getWrapped( $text ); $retVal = null; - $correctedtext = self::clean( $wrappedtext, false, $retVal ); + list( $correctedtext, $errors ) = self::clean( $wrappedtext, $retVal ); if ( $retVal < 0 ) { wfDebug( "Possible tidy configuration error!\n" ); return $text . "\n\n"; - } elseif ( is_null( $correctedtext ) ) { + } elseif ( $correctedtext === '' && $text !== '' ) { wfDebug( "Tidy error detected!\n" ); return $text . "\n\n"; } @@ -155,31 +155,23 @@ class MWTidy { */ public static function checkErrors( $text, &$errorStr = null ) { $retval = 0; - $errorStr = self::clean( $text, true, $retval ); + list( $outputStr, $errorStr ) = self::clean( $text, $retval ); return ( $retval < 0 && $errorStr == '' ) || $retval == 0; } /** * Perform a clean/repair operation * @param string $text HTML to check - * @param bool $stderr Whether to read result from STDERR rather than STDOUT * @param int &$retval Exit code (-1 on internal error) * @return string|null */ - private static function clean( $text, $stderr = false, &$retval = null ) { + private static function clean( $text, &$retval = null ) { global $wgTidyInternal; if ( $wgTidyInternal ) { - if ( wfIsHHVM() ) { - if ( $stderr ) { - throw new MWException( __METHOD__ . ": error text return from HHVM tidy is not supported" ); - } - return self::hhvmClean( $text, $retval ); - } else { - return self::phpClean( $text, $stderr, $retval ); - } + return self::internalClean( $text, $retval ); } else { - return self::externalClean( $text, $stderr, $retval ); + return self::externalClean( $text, $retval ); } } @@ -188,32 +180,23 @@ class MWTidy { * Also called in OutputHandler.php for full page validation * * @param string $text HTML to check - * @param bool $stderr Whether to read result from STDERR rather than STDOUT * @param int &$retval Exit code (-1 on internal error) * @return string|null */ - private static function externalClean( $text, $stderr = false, &$retval = null ) { + private static function externalClean( $text, &$retval = null ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; wfProfileIn( __METHOD__ ); - $cleansource = ''; $opts = ' -utf8'; - if ( $stderr ) { - $descriptorspec = array( - 0 => array( 'pipe', 'r' ), - 1 => array( 'file', wfGetNull(), 'a' ), - 2 => array( 'pipe', 'w' ) - ); - } else { - $descriptorspec = array( - 0 => array( 'pipe', 'r' ), - 1 => array( 'pipe', 'w' ), - 2 => array( 'file', wfGetNull(), 'a' ) - ); - } + $descriptorspec = array( + 0 => array( 'pipe', 'r' ), + 1 => array( 'pipe', 'w' ), + 2 => array( 'pipe', 'w' ), + ); - $readpipe = $stderr ? 2 : 1; + $outputBuffer = ''; + $errorBuffer = ''; $pipes = array(); $process = proc_open( @@ -230,24 +213,25 @@ class MWTidy { // for tidyParseStdin and tidySaveStdout in console/tidy.c fwrite( $pipes[0], $text ); fclose( $pipes[0] ); - while ( !feof( $pipes[$readpipe] ) ) { - $cleansource .= fgets( $pipes[$readpipe], 1024 ); + + while ( !feof( $pipes[1] ) ) { + $outputBuffer .= fgets( $pipes[1], 1024 ); } - fclose( $pipes[$readpipe] ); + fclose( $pipes[1] ); + + while ( !feof( $pipes[2] ) ) { + $errorBuffer .= fgets( $pipes[2], 1024 ); + } + fclose( $pipes[2] ); + $retval = proc_close( $process ); } else { wfWarn( "Unable to start external tidy process" ); $retval = -1; } - if ( !$stderr && $cleansource == '' && $text != '' ) { - // Some kind of error happened, so we couldn't get the corrected text. - // Just give up; we'll use the source text and append a warning. - $cleansource = null; - } - wfProfileOut( __METHOD__ ); - return $cleansource; + return array( $outputBuffer, $errorBuffer ); } /** @@ -255,11 +239,10 @@ class MWTidy { * saving the overhead of spawning a new process. * * @param string $text HTML to check - * @param bool $stderr Whether to read result from error status instead of output * @param int &$retval Exit code (-1 on internal error) * @return string|null */ - private static function phpClean( $text, $stderr = false, &$retval = null ) { + private static function internalClean( $text, &$retval = null ) { global $wgTidyConf, $wgDebugTidy; wfProfileIn( __METHOD__ ); @@ -271,57 +254,36 @@ class MWTidy { return null; } - $tidy = new tidy; - $tidy->parseString( $text, $wgTidyConf, 'utf8' ); - - if ( $stderr ) { - $retval = $tidy->getStatus(); - - wfProfileOut( __METHOD__ ); - return $tidy->errorBuffer; - } - - $tidy->cleanRepair(); - $retval = $tidy->getStatus(); - if ( $retval == 2 ) { - // 2 is magic number for fatal error - // http://www.php.net/manual/en/function.tidy-get-status.php - $cleansource = null; + $outputBuffer = ''; + $errorBuffer = ''; + + if ( wfIsHHVM() ) { + // Use the tidy extension for HHVM from + // https://github.com/wikimedia/mediawiki-php-tidy + // + // This currently does not support the object-oriented interface, but + // tidy_repair_string() can be used for the most common tasks. + $result = tidy_repair_string( $text, $wgTidyConf, 'utf8' ); + $outputBuffer .= $result; + $retval = $result === false ? -1 : 0; } else { - $cleansource = tidy_get_output( $tidy ); - if ( $wgDebugTidy && $retval > 0 ) { - $cleansource .= "', '-->', $tidy->errorBuffer ) . - "\n-->"; + $tidy = new tidy; + $tidy->parseString( $text, $wgTidyConf, 'utf8' ); + $tidy->cleanRepair(); + $retval = $tidy->getStatus(); + $outputBuffer .= tidy_get_output( $tidy ); + if ( $retval > 0 ) { + $errorBuffer .= $tidy->errorBuffer; } } - wfProfileOut( __METHOD__ ); - return $cleansource; - } - - /** - * Use the tidy extension for HHVM from - * https://github.com/wikimedia/mediawiki-php-tidy - * - * This currently does not support the object-oriented interface, but - * tidy_repair_string() can be used for the most common tasks. - * - * @param string $text HTML to check - * @param int &$retval Exit code (-1 on internal error) - * @return string|null - */ - private static function hhvmClean( $text, &$retval ) { - global $wgTidyConf; - wfProfileIn( __METHOD__ ); - $cleansource = tidy_repair_string( $text, $wgTidyConf, 'utf8' ); - if ( $cleansource === false ) { - $cleansource = null; - $retval = -1; - } else { - $retval = 0; + if ( $wgDebugTidy && $errorBuffer && $retval > 0 ) { + $outputBuffer .= "', '-->', $tidy->errorBuffer ) . + "\n-->"; } + wfProfileOut( __METHOD__ ); - return $cleansource; + return array( $outputBuffer, $errorBuffer ); } } -- 2.20.1