From 906e978e91510b231a4b61d96adba4b685c355ef Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Thu, 24 Jan 2008 04:29:56 +0000 Subject: [PATCH] * Factored out PPD "part" handling into its own class. Verified with differential fuzz test. * Stabilise timestamps generated by the parser to avoid diff test false positives * Fixed msgnw bug. Use RECOVER_ORIG. * Fixed editintro bug. Cloning the parser in MessageCache has some side-effects that need to be corrected. * Fixed typo in Parser_DiffTest.php * General improvements to preprocessorFuzzTest.php * Fixed breakage of XML output feature in Special:ExpandTemplates --- includes/Parser.php | 106 +++++---- includes/ParserOptions.php | 9 + includes/Parser_DiffTest.php | 2 +- includes/Preprocessor_DOM.php | 314 +++++++++++++++++---------- maintenance/preprocessorFuzzTest.php | 64 +++++- 5 files changed, 328 insertions(+), 167 deletions(-) diff --git a/includes/Parser.php b/includes/Parser.php index 209b712c0d..d5354cb21a 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -89,7 +89,7 @@ class Parser # Persistent: var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerSuffix, - $mExtLinkBracketedRegex, $mPreprocessor, $mDefaultStripList; + $mExtLinkBracketedRegex, $mPreprocessor, $mDefaultStripList, $mVarCache, $mConf; # Cleared with clearState(): @@ -118,6 +118,7 @@ class Parser * @public */ function __construct( $conf = array() ) { + $this->mConf = $conf; $this->mTagHooks = array(); $this->mTransparentTagHooks = array(); $this->mFunctionHooks = array(); @@ -126,6 +127,7 @@ class Parser $this->mMarkerSuffix = "-QINU\x7f"; $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; + $this->mVarCache = array(); if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; } else { @@ -237,6 +239,7 @@ class Parser * the behaviour of in a link. */ #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); + # Changed to \x7f to allow XML double-parsing -- TS $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString(); # Clear these on every parse, bug 4549 @@ -252,6 +255,11 @@ class Parser $this->mDefaultSort = false; $this->mHeadings = array(); + # Fix cloning + if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { + $this->mPreprocessor = null; + } + wfRunHooks( 'ParserClearState', array( &$this ) ); wfProfileOut( __METHOD__ ); } @@ -2374,14 +2382,13 @@ class Parser * Some of these require message or data lookups and can be * expensive to check many times. */ - static $varCache = array(); - if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$varCache ) ) ) { - if ( isset( $varCache[$index] ) ) { - return $varCache[$index]; + if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) { + if ( isset( $this->mVarCache[$index] ) ) { + return $this->mVarCache[$index]; } } - $ts = time(); + $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); # Use the time zone @@ -2408,29 +2415,29 @@ class Parser switch ( $index ) { case 'currentmonth': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'm', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'm', $ts ) ); case 'currentmonthname': - return $varCache[$index] = $wgContLang->getMonthName( gmdate( 'n', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->getMonthName( gmdate( 'n', $ts ) ); case 'currentmonthnamegen': - return $varCache[$index] = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) ); case 'currentmonthabbrev': - return $varCache[$index] = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); case 'currentday': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'j', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'j', $ts ) ); case 'currentday2': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'd', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'd', $ts ) ); case 'localmonth': - return $varCache[$index] = $wgContLang->formatNum( $localMonth ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localMonth ); case 'localmonthname': - return $varCache[$index] = $wgContLang->getMonthName( $localMonthName ); + return $this->mVarCache[$index] = $wgContLang->getMonthName( $localMonthName ); case 'localmonthnamegen': - return $varCache[$index] = $wgContLang->getMonthNameGen( $localMonthName ); + return $this->mVarCache[$index] = $wgContLang->getMonthNameGen( $localMonthName ); case 'localmonthabbrev': - return $varCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName ); + return $this->mVarCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName ); case 'localday': - return $varCache[$index] = $wgContLang->formatNum( $localDay ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localDay ); case 'localday2': - return $varCache[$index] = $wgContLang->formatNum( $localDay2 ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localDay2 ); case 'pagename': return wfEscapeWikiText( $this->mTitle->getText() ); case 'pagenamee': @@ -2516,51 +2523,51 @@ class Parser case 'subjectspacee': return( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); case 'currentdayname': - return $varCache[$index] = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); + return $this->mVarCache[$index] = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); case 'currentyear': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'Y', $ts ), true ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'Y', $ts ), true ); case 'currenttime': - return $varCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false ); + return $this->mVarCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false ); case 'currenthour': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'H', $ts ), true ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'H', $ts ), true ); case 'currentweek': // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to // int to remove the padding - return $varCache[$index] = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) ); case 'currentdow': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'w', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'w', $ts ) ); case 'localdayname': - return $varCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 ); + return $this->mVarCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 ); case 'localyear': - return $varCache[$index] = $wgContLang->formatNum( $localYear, true ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localYear, true ); case 'localtime': - return $varCache[$index] = $wgContLang->time( $localTimestamp, false, false ); + return $this->mVarCache[$index] = $wgContLang->time( $localTimestamp, false, false ); case 'localhour': - return $varCache[$index] = $wgContLang->formatNum( $localHour, true ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localHour, true ); case 'localweek': // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to // int to remove the padding - return $varCache[$index] = $wgContLang->formatNum( (int)$localWeek ); + return $this->mVarCache[$index] = $wgContLang->formatNum( (int)$localWeek ); case 'localdow': - return $varCache[$index] = $wgContLang->formatNum( $localDayOfWeek ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localDayOfWeek ); case 'numberofarticles': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::articles() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::articles() ); case 'numberoffiles': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::images() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::images() ); case 'numberofusers': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::users() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::users() ); case 'numberofpages': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); case 'numberofadmins': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); case 'numberofedits': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); case 'currenttimestamp': - return $varCache[$index] = wfTimestampNow(); + return $this->mVarCache[$index] = wfTimestamp( TS_MW, $ts ); case 'localtimestamp': - return $varCache[$index] = $localTimestamp; + return $this->mVarCache[$index] = $localTimestamp; case 'currentversion': - return $varCache[$index] = SpecialVersion::getVersion(); + return $this->mVarCache[$index] = SpecialVersion::getVersion(); case 'sitename': return $wgSitename; case 'server': @@ -2576,7 +2583,7 @@ class Parser return $wgContLanguageCode; default: $ret = null; - if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ) ) ) + if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret ) ) ) return $ret; else return null; @@ -2936,7 +2943,9 @@ class Parser # Clean up argument array $newFrame = $frame->newChild( $args, $title ); - if ( $titleText !== false && $newFrame->isEmpty() ) { + if ( $nowiki ) { + $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG ); + } elseif ( $titleText !== false && $newFrame->isEmpty() ) { # Expansion is eligible for the empty-frame cache if ( isset( $this->mTplExpandCache[$titleText] ) ) { $text = $this->mTplExpandCache[$titleText]; @@ -2949,6 +2958,10 @@ class Parser $text = $newFrame->expand( $text ); } } + if ( $isLocalObj && $nowiki ) { + $text = $frame->expand( $text, PPFrame::RECOVER_ORIG ); + $isLocalObj = false; + } # Replace raw HTML by a placeholder # Add a blank line preceding, to prevent it from mucking up @@ -3635,7 +3648,7 @@ class Parser $oldtz = getenv( 'TZ' ); putenv( 'TZ='.$wgLocaltimezone ); } - $d = $wgContLang->timeanddate( date( 'YmdHis' ), false, false) . + $d = $wgContLang->timeanddate( $this->mOptions->getTimestamp(), false, false) . ' (' . date( 'T' ) . ')'; if ( isset( $wgLocaltimezone ) ) { putenv( 'TZ='.$oldtz ); @@ -4776,6 +4789,9 @@ class Parser */ function testSrvus( $text, $title, $options, $outputType = self::OT_HTML ) { $this->clearState(); + if ( ! ( $title instanceof Title ) ) { + $title = Title::newFromText( $title ); + } $this->mTitle = $title; $this->mOptions = $options; $this->setOutputType( $outputType ); @@ -4787,10 +4803,16 @@ class Parser function testPst( $text, $title, $options ) { global $wgUser; + if ( ! ( $title instanceof Title ) ) { + $title = Title::newFromText( $title ); + } return $this->preSaveTransform( $text, $title, $wgUser, $options ); } function testPreprocess( $text, $title, $options ) { + if ( ! ( $title instanceof Title ) ) { + $title = Title::newFromText( $title ); + } return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); } } diff --git a/includes/ParserOptions.php b/includes/ParserOptions.php index 5bab3ebc7d..996bba2171 100644 --- a/includes/ParserOptions.php +++ b/includes/ParserOptions.php @@ -26,6 +26,7 @@ class ParserOptions var $mRemoveComments; # Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS var $mTemplateCallback; # Callback for template fetching var $mEnableLimitReport; # Enable limit report in an HTML comment on output + var $mTimestamp; # Timestamp used for {{CURRENTDAY}} etc. var $mUser; # Stored user object, just used to initialise the skin @@ -60,6 +61,13 @@ class ParserOptions return $this->mDateFormat; } + function getTimestamp() { + if ( !isset( $this->mTimestamp ) ) { + $this->mTimestamp = wfTimestampNow(); + } + return $this->mTimestamp; + } + function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); } function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); } function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); } @@ -78,6 +86,7 @@ class ParserOptions function setRemoveComments( $x ) { return wfSetVar( $this->mRemoveComments, $x ); } function setTemplateCallback( $x ) { return wfSetVar( $this->mTemplateCallback, $x ); } function enableLimitReport( $x = true ) { return wfSetVar( $this->mEnableLimitReport, $x ); } + function setTimestamp( $x ) { return wfSetVar( $this->mTimestamp, $x ); } function __construct( $user = null ) { $this->initialiseFromUser( $user ); diff --git a/includes/Parser_DiffTest.php b/includes/Parser_DiffTest.php index e4a9362302..754434a952 100644 --- a/includes/Parser_DiffTest.php +++ b/includes/Parser_DiffTest.php @@ -18,7 +18,7 @@ class Parser_DiffTest foreach ( $this->conf['parsers'] as $i => $parserConf ) { if ( !is_array( $parserConf ) ) { $class = $parserConf; - $parserconf = array( 'class' => $parserConf ); + $parserConf = array( 'class' => $parserConf ); } else { $class = $parserConf['class']; } diff --git a/includes/Preprocessor_DOM.php b/includes/Preprocessor_DOM.php index d7701c0913..3a712b0109 100644 --- a/includes/Preprocessor_DOM.php +++ b/includes/Preprocessor_DOM.php @@ -1,16 +1,37 @@ parser = $parser; + $mem = ini_get( 'memory_limit' ); + $this->memoryLimit = false; + if ( strval( $mem ) !== '' && $mem != -1 ) { + if ( preg_match( '/^\d+$/', $mem ) ) { + $this->memoryLimit = $mem; + } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) { + $this->memoryLimit = $m[1] * 1048576; + } + } } function newFrame() { return new PPFrame_DOM( $this ); } + function memCheck() { + if ( $this->memoryLimit === false ) { + return; + } + $usage = memory_get_usage(); + if ( $usage > $this->memoryLimit * 0.9 ) { + $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 ); + throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" ); + } + return $usage <= $this->memoryLimit * 0.8; + } + /** * Preprocess some wikitext and return the document tree. * This is the ghost of Parser::replace_variables(). @@ -78,11 +99,11 @@ class Preprocessor_DOM implements Preprocessor { $stack = new PPDStack; - $searchBase = '[{<'; + $searchBase = '[{<'; #} $revText = strrev( $text ); // For fast reverse searches $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start - $accum =& $stack->getAccum(); # Current text accumulator + $accum =& $stack->getAccum(); # Current accumulator $accum = ''; $findEquals = false; # True to find equals signs in arguments $findPipe = false; # True to take notice of pipe characters @@ -93,6 +114,8 @@ class Preprocessor_DOM implements Preprocessor { $fakeLineStart = true; # Do a line-start run without outputting an LF character while ( true ) { + if ( ! ($i % 10) ) $this->memCheck(); + if ( $findOnlyinclude ) { // Ignore all input up to the next $startPos = strpos( $text, '', $i ); @@ -241,6 +264,17 @@ class Preprocessor_DOM implements Preprocessor { $endPos += 2; } + /* + if ( $stack->top ) { + if ( $stack->top->commentEndPos !== false && $stack->top->commentEndPos == $wsStart ) { + // Comments abutting, no change in visual end + $stack->top->commentEndPos = $wsEnd; + } else { + $stack->top->visualEndPos = $wsStart; + $stack->top->commentEndPos = $wsEnd; + } + } + */ $i = $endPos + 1; $inner = substr( $text, $startPos, $endPos - $startPos + 1 ); $accum .= '' . htmlspecialchars( $inner ) . ''; @@ -326,7 +360,7 @@ class Preprocessor_DOM implements Preprocessor { $piece = array( 'open' => "\n", 'close' => "\n", - 'parts' => array( str_repeat( '=', $count ) ), + 'parts' => array( new PPDPart( str_repeat( '=', $count ) ) ), 'startPos' => $i, 'count' => $count ); $stack->push( $piece ); @@ -395,8 +429,6 @@ class Preprocessor_DOM implements Preprocessor { 'open' => $curChar, 'close' => $rule['end'], 'count' => $count, - 'parts' => array( '' ), - 'eqpos' => array(), 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), ); @@ -444,14 +476,12 @@ class Preprocessor_DOM implements Preprocessor { $name = $rule['names'][$matchingCount]; if ( $name === null ) { // No element, just literal text - $element = str_repeat( $piece->open, $matchingCount ) . - implode( '|', $piece->parts ) . - str_repeat( $rule['end'], $matchingCount ); + $element = $piece->breakSyntax( $matchingCount ) . str_repeat( $rule['end'], $matchingCount ); } else { # Create XML element # Note: $parts is already XML, does not need to be encoded further $parts = $piece->parts; - $title = $parts[0]; + $title = $parts[0]->out; unset( $parts[0] ); # The invocation is at the start of the line if lineStart is set in @@ -466,13 +496,12 @@ class Preprocessor_DOM implements Preprocessor { $element .= "$title"; $argIndex = 1; foreach ( $parts as $partIndex => $part ) { - if ( isset( $piece->eqpos[$partIndex] ) ) { - $eqpos = $piece->eqpos[$partIndex]; - $argName = substr( $part, 0, $eqpos ); - $argValue = substr( $part, $eqpos + 1 ); + if ( isset( $part->eqpos ) ) { + $argName = substr( $part->out, 0, $part->eqpos ); + $argValue = substr( $part->out, $part->eqpos + 1 ); $element .= "$argName=$argValue"; } else { - $element .= "$part"; + $element .= "{$part->out}"; $argIndex++; } } @@ -488,9 +517,8 @@ class Preprocessor_DOM implements Preprocessor { # Re-add the old stack element if it still has unmatched opening characters remaining if ($matchingCount < $piece->count) { - $piece->parts = array( '' ); + $piece->parts = array( new PPDPart ); $piece->count -= $matchingCount; - $piece->eqpos = array(); # do we still qualify for any callback with remaining count? $names = $rules[$piece->open]['names']; $skippedBraces = 0; @@ -515,15 +543,14 @@ class Preprocessor_DOM implements Preprocessor { elseif ( $found == 'pipe' ) { $findEquals = true; // shortcut for getFlags() - $stack->top->addPart(); + $stack->addPart(); $accum =& $stack->getAccum(); ++$i; } elseif ( $found == 'equals' ) { $findEquals = false; // shortcut for getFlags() - $partsCount = count( $stack->top->parts ); - $stack->top->eqpos[$partsCount - 1] = strlen( $accum ); + $stack->getCurrentPart()->eqpos = strlen( $accum ); $accum .= '='; ++$i; } @@ -531,14 +558,10 @@ class Preprocessor_DOM implements Preprocessor { # Output any remaining unclosed brackets foreach ( $stack->stack as $piece ) { - if ( $piece->open == "\n" ) { - $stack->topAccum .= $piece->parts[0]; - } else { - $stack->topAccum .= str_repeat( $piece->open, $piece->count ) . implode( '|', $piece->parts ); - } + $stack->rootAccum .= $piece->breakSyntax(); } - $stack->topAccum .= ''; - $xml = $stack->topAccum; + $stack->rootAccum .= ''; + $xml = $stack->rootAccum; wfProfileOut( __METHOD__.'-makexml' ); wfProfileIn( __METHOD__.'-loadXML' ); @@ -561,6 +584,156 @@ class Preprocessor_DOM implements Preprocessor { } } +/** + * Stack class to help Preprocessor::preprocessToObj() + */ +class PPDStack { + var $stack, $rootAccum, $top; + var $out; + static $false = false; + + function __construct() { + $this->stack = array(); + $this->top = false; + $this->rootAccum = ''; + $this->accum =& $this->rootAccum; + } + + function count() { + return count( $this->stack ); + } + + function &getAccum() { + return $this->accum; + } + + function getCurrentPart() { + if ( $this->top === false ) { + return false; + } else { + return $this->top->getCurrentPart(); + } + } + + function push( $data ) { + if ( $data instanceof PPDStackElement ) { + $this->stack[] = $data; + } else { + $this->stack[] = new PPDStackElement( $data ); + } + $this->top = $this->stack[ count( $this->stack ) - 1 ]; + $this->accum =& $this->top->getAccum(); + } + + function pop() { + if ( !count( $this->stack ) ) { + throw new MWException( __METHOD__.': no elements remaining' ); + } + $temp = array_pop( $this->stack ); + + if ( count( $this->stack ) ) { + $this->top = $this->stack[ count( $this->stack ) - 1 ]; + $this->accum =& $this->top->getAccum(); + } else { + $this->top = self::$false; + $this->accum =& $this->rootAccum; + } + return $temp; + } + + function addPart( $s = '' ) { + $this->top->addPart( $s ); + $this->accum =& $this->top->getAccum(); + } + + function getFlags() { + if ( !count( $this->stack ) ) { + return array( + 'findEquals' => false, + 'findPipe' => false, + 'inHeading' => false, + ); + } else { + return $this->top->getFlags(); + } + } +} + +class PPDStackElement { + var $open, // Opening character (\n for heading) + $close, // Matching closing character + $count, // Number of opening characters found (number of "=" for heading) + $parts, // Array of PPDPart objects describing pipe-separated parts. + $lineStart; // True if the open char appeared at the start of the input line. Not set for headings. + + function __construct( $data = array() ) { + $this->parts = array( new PPDPart ); + + foreach ( $data as $name => $value ) { + $this->$name = $value; + } + } + + function &getAccum() { + return $this->parts[count($this->parts) - 1]->out; + } + + function addPart( $s = '' ) { + $this->parts[] = new PPDPart( $s ); + } + + function getCurrentPart() { + return $this->parts[count($this->parts) - 1]; + } + + function getFlags() { + $partCount = count( $this->parts ); + $findPipe = $this->open != "\n" && $this->open != '['; + return array( + 'findPipe' => $findPipe, + 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ), + 'inHeading' => $this->open == "\n", + ); + } + + /** + * Get the output string that would result if the close is not found. + */ + function breakSyntax( $openingCount = false ) { + if ( $this->open == "\n" ) { + $s = $this->parts[0]->out; + } else { + if ( $openingCount === false ) { + $openingCount = $this->count; + } + $s = str_repeat( $this->open, $openingCount ); + $first = true; + foreach ( $this->parts as $part ) { + if ( $first ) { + $first = false; + } else { + $s .= '|'; + } + $s .= $part->out; + } + } + return $s; + } +} + +class PPDPart { + var $out; // Output accumulator string + + // Optional member variables: + // eqpos Position of equals sign in output accumulator + // commentEnd Past-the-end input pointer for the last comment encountered + // visualEnd Past-the-end input pointer for the end of the accumulator minus comments + + function __construct( $out = '' ) { + $this->out = $out; + } +} + /** * An expansion frame, used as a context to expand the result of preprocessToDom() */ @@ -1037,91 +1210,6 @@ class PPTemplateFrame_DOM extends PPFrame_DOM { } } -/** - * Stack class to help Parser::preprocessToDom() - */ -class PPDStack { - var $stack, $topAccum, $top; - - function __construct() { - $this->stack = array(); - $this->topAccum = ''; - $this->top = false; - } - - function &getAccum() { - if ( count( $this->stack ) ) { - return $this->top->getAccum(); - } else { - return $this->topAccum; - } - } - - function push( $data ) { - if ( $data instanceof PPDStackElement ) { - $this->stack[] = $data; - } else { - $this->stack[] = new PPDStackElement( $data ); - } - $this->top =& $this->stack[ count( $this->stack ) - 1 ]; - } - - function pop() { - if ( !count( $this->stack ) ) { - throw new MWException( __METHOD__.': no elements remaining' ); - } - $temp = array_pop( $this->stack ); - if ( count( $this->stack ) ) { - $this->top =& $this->stack[ count( $this->stack ) - 1 ]; - } else { - $this->top = false; - } - } - - function getFlags() { - if ( !count( $this->stack ) ) { - return array( - 'findEquals' => false, - 'findPipe' => false, - 'inHeading' => false, - ); - } else { - return $this->top->getFlags(); - } - } -} - -class PPDStackElement { - var $open, $close, $count, $parts, $eqpos, $lineStart; - - function __construct( $data = array() ) { - $this->parts = array( '' ); - $this->eqpos = array(); - - foreach ( $data as $name => $value ) { - $this->$name = $value; - } - } - - function &getAccum() { - return $this->parts[count($this->parts) - 1]; - } - - function addPart( $s = '' ) { - $this->parts[] = $s; - } - - function getFlags() { - $partCount = count( $this->parts ); - $findPipe = $this->open != "\n" && $this->open != '['; - return array( - 'findPipe' => $findPipe, - 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->eqpos[$partCount - 1] ), - 'inHeading' => $this->open == "\n", - ); - } -} - class PPNode_DOM implements PPNode { var $node; @@ -1143,7 +1231,7 @@ class PPNode_DOM implements PPNode { $s .= $node->ownerDocument->saveXML( $node ); } } else { - $s = $this->node->ownerDocument->saveXML( $node ); + $s = $this->node->ownerDocument->saveXML( $this->node ); } return $s; } diff --git a/maintenance/preprocessorFuzzTest.php b/maintenance/preprocessorFuzzTest.php index e3cd405ab8..d814c4fefb 100644 --- a/maintenance/preprocessorFuzzTest.php +++ b/maintenance/preprocessorFuzzTest.php @@ -6,7 +6,7 @@ $wgHooks['BeforeParserFetchTemplateAndtitle'][] = 'PPFuzzTester::templateHook'; class PPFuzzTester { var $hairs = array( - '[[', ']]', '{{', '}}', '{{{', '}}}', + '[[', ']]', '{{', '{{', '}}', '}}', '{{{', '}}}', '<', '>', '', '', '', '', '', "\n==", "==\n", @@ -23,6 +23,7 @@ class PPFuzzTester { var $maxTemplates = 5; //var $outputTypes = array( 'OT_HTML', 'OT_WIKI', 'OT_PREPROCESS' ); var $entryPoints = array( 'testSrvus', 'testPst', 'testPreprocess' ); + var $verbose = false; static $currentTest = false; function execute() { @@ -33,10 +34,14 @@ class PPFuzzTester { echo "Unable to create 'results' directory\n"; exit( 1 ); } - for ( $i = 0; true; $i++ ) { + $overallStart = microtime( true ); + $reportInterval = 1000; + for ( $i = 1; true; $i++ ) { + $t = -microtime( true ); try { self::$currentTest = new PPFuzzTest( $this ); self::$currentTest->execute(); + $passed = 'passed'; } catch ( MWException $e ) { $testReport = self::$currentTest->getReport(); $exceptionReport = $e->getText(); @@ -45,8 +50,30 @@ class PPFuzzTester { file_put_contents( "results/ppft-$hash.fail", "Input:\n$testReport\n\nException report:\n$exceptionReport\n" ); print "Test $hash failed\n"; + $passed = 'failed'; } - if ( $i % 1000 == 0 ) { + $t += microtime( true ); + + if ( $this->verbose ) { + printf( "Test $passed in %.3f seconds\n", $t ); + print self::$currentTest->getReport(); + } + + $reportMetric = ( microtime( true ) - $overallStart ) / $i * $reportInterval; + if ( $reportMetric > 25 ) { + if ( substr( $reportInterval, 0, 1 ) === '1' ) { + $reportInterval /= 2; + } else { + $reportInterval /= 5; + } + } elseif ( $reportMetric < 4 ) { + if ( substr( $reportInterval, 0, 1 ) === '1' ) { + $reportInterval *= 5; + } else { + $reportInterval *= 2; + } + } + if ( $i % $reportInterval == 0 ) { print "$i tests done\n"; /* $testReport = self::$currentTest->getReport(); @@ -54,10 +81,14 @@ class PPFuzzTester { file_put_contents( $filename, "Input:\n$testReport\n" );*/ } } + wfLogProfilingData(); } - function makeInputText() { - $length = mt_rand( $this->minLength, $this->maxLength ); + function makeInputText( $max = false ) { + if ( $max === false ) { + $max = $this->maxLength; + } + $length = mt_rand( $this->minLength, $max ); $s = ''; for ( $i = 0; $i < $length; $i++ ) { $hairIndex = mt_rand( 0, count( $this->hairs ) - 1 ); @@ -88,15 +119,16 @@ class PPFuzzTester { } class PPFuzzTest { - var $templates, $mainText, $title, $entryPoint; + var $templates, $mainText, $title, $entryPoint, $output; function __construct( $tester ) { + global $wgMaxSigChars; $this->parent = $tester; $this->mainText = $tester->makeInputText(); $this->title = $tester->makeTitle(); //$this->outputType = $tester->pickOutputType(); $this->entryPoint = $tester->pickEntryPoint(); - $this->nickname = $tester->makeInputText(); + $this->nickname = $tester->makeInputText( $wgMaxSigChars + 10); $this->fancySig = (bool)mt_rand( 0, 1 ); $this->templates = array(); } @@ -138,8 +170,9 @@ class PPFuzzTest { $options = new ParserOptions; $options->setTemplateCallback( array( $this, 'templateHook' ) ); - //$wgParser->startExternalParse( $this->title, $options, constant( $this->outputType ) ); - return call_user_func( array( $wgParser, $this->entryPoint ), $this->mainText, $this->title, $options ); + $options->setTimestamp( wfTimestampNow() ); + $this->output = call_user_func( array( $wgParser, $this->entryPoint ), $this->mainText, $this->title->getPrefixedText(), $options ); + return $this->output; } function getReport() { @@ -156,6 +189,7 @@ class PPFuzzTest { $s .= "[[$titleText]]: " . var_export( $template['text'], true ) . "\n"; } } + $s .= "Output: " . var_export( $this->output, true ) . "\n"; return $s; } } @@ -163,6 +197,14 @@ class PPFuzzTest { class PPFuzzUser extends User { var $ppfz_test; + function load() { + if ( $this->mDataLoaded ) { + return; + } + $this->mDataLoaded = true; + $this->loadDefaults( $this->mName ); + } + function getOption( $option, $defaultOverride = '' ) { if ( $option === 'fancysig' ) { return $this->ppfz_test->fancySig; @@ -182,10 +224,10 @@ if ( isset( $args[0] ) ) { exit( 1 ); } $test = unserialize( $testText ); - print $test->getReport(); $result = $test->execute(); - print "Test passed.\nResult: $result\n"; + print "Test passed.\n"; } else { $tester = new PPFuzzTester; + $tester->verbose = isset( $options['verbose'] ); $tester->execute(); } -- 2.20.1