* Factored out PPD "part" handling into its own class. Verified with differential...
authorTim Starling <tstarling@users.mediawiki.org>
Thu, 24 Jan 2008 04:29:56 +0000 (04:29 +0000)
committerTim Starling <tstarling@users.mediawiki.org>
Thu, 24 Jan 2008 04:29:56 +0000 (04:29 +0000)
* Stabilise timestamps generated by the parser to avoid diff test false positives
* Fixed msgnw bug. Use RECOVER_ORIG.
* Fixed editintro bug. Cloning the parser in MessageCache has some side-effects that need to be corrected.
* Fixed typo in Parser_DiffTest.php
* General improvements to preprocessorFuzzTest.php
* Fixed breakage of XML output feature in Special:ExpandTemplates

includes/Parser.php
includes/ParserOptions.php
includes/Parser_DiffTest.php
includes/Preprocessor_DOM.php
maintenance/preprocessorFuzzTest.php

index 209b712..d5354cb 100644 (file)
@@ -89,7 +89,7 @@ class Parser
        # Persistent:
        var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
                $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerSuffix,
-               $mExtLinkBracketedRegex, $mPreprocessor, $mDefaultStripList;
+               $mExtLinkBracketedRegex, $mPreprocessor, $mDefaultStripList, $mVarCache, $mConf;
 
 
        # Cleared with clearState():
@@ -118,6 +118,7 @@ class Parser
         * @public
         */
        function __construct( $conf = array() ) {
+               $this->mConf = $conf;
                $this->mTagHooks = array();
                $this->mTransparentTagHooks = array();
                $this->mFunctionHooks = array();
@@ -126,6 +127,7 @@ class Parser
                $this->mMarkerSuffix = "-QINU\x7f";
                $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
                        '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
+               $this->mVarCache = array();
                if ( isset( $conf['preprocessorClass'] ) ) {
                        $this->mPreprocessorClass = $conf['preprocessorClass'];
                } else {
@@ -237,6 +239,7 @@ class Parser
                 * the behaviour of <nowiki> in a link.
                 */
                #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
+               # Changed to \x7f to allow XML double-parsing -- TS
                $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString();
 
                # Clear these on every parse, bug 4549
@@ -252,6 +255,11 @@ class Parser
                $this->mDefaultSort = false;
                $this->mHeadings = array();
 
+               # Fix cloning
+               if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
+                       $this->mPreprocessor = null;
+               }
+
                wfRunHooks( 'ParserClearState', array( &$this ) );
                wfProfileOut( __METHOD__ );
        }
@@ -2374,14 +2382,13 @@ class Parser
                 * Some of these require message or data lookups and can be
                 * expensive to check many times.
                 */
-               static $varCache = array();
-               if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$varCache ) ) ) {
-                       if ( isset( $varCache[$index] ) ) {
-                               return $varCache[$index];
+               if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) {
+                       if ( isset( $this->mVarCache[$index] ) ) {
+                               return $this->mVarCache[$index];
                        }
                }
 
-               $ts = time();
+               $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
                wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) );
 
                # Use the time zone
@@ -2408,29 +2415,29 @@ class Parser
 
                switch ( $index ) {
                        case 'currentmonth':
-                               return $varCache[$index] = $wgContLang->formatNum( gmdate( 'm', $ts ) );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'm', $ts ) );
                        case 'currentmonthname':
-                               return $varCache[$index] = $wgContLang->getMonthName( gmdate( 'n', $ts ) );
+                               return $this->mVarCache[$index] = $wgContLang->getMonthName( gmdate( 'n', $ts ) );
                        case 'currentmonthnamegen':
-                               return $varCache[$index] = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) );
+                               return $this->mVarCache[$index] = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) );
                        case 'currentmonthabbrev':
-                               return $varCache[$index] = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) );
+                               return $this->mVarCache[$index] = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) );
                        case 'currentday':
-                               return $varCache[$index] = $wgContLang->formatNum( gmdate( 'j', $ts ) );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'j', $ts ) );
                        case 'currentday2':
-                               return $varCache[$index] = $wgContLang->formatNum( gmdate( 'd', $ts ) );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'd', $ts ) );
                        case 'localmonth':
-                               return $varCache[$index] = $wgContLang->formatNum( $localMonth );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( $localMonth );
                        case 'localmonthname':
-                               return $varCache[$index] = $wgContLang->getMonthName( $localMonthName );
+                               return $this->mVarCache[$index] = $wgContLang->getMonthName( $localMonthName );
                        case 'localmonthnamegen':
-                               return $varCache[$index] = $wgContLang->getMonthNameGen( $localMonthName );
+                               return $this->mVarCache[$index] = $wgContLang->getMonthNameGen( $localMonthName );
                        case 'localmonthabbrev':
-                               return $varCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName );
+                               return $this->mVarCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName );
                        case 'localday':
-                               return $varCache[$index] = $wgContLang->formatNum( $localDay );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( $localDay );
                        case 'localday2':
-                               return $varCache[$index] = $wgContLang->formatNum( $localDay2 );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( $localDay2 );
                        case 'pagename':
                                return wfEscapeWikiText( $this->mTitle->getText() );
                        case 'pagenamee':
@@ -2516,51 +2523,51 @@ class Parser
                        case 'subjectspacee':
                                return( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
                        case 'currentdayname':
-                               return $varCache[$index] = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 );
+                               return $this->mVarCache[$index] = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 );
                        case 'currentyear':
-                               return $varCache[$index] = $wgContLang->formatNum( gmdate( 'Y', $ts ), true );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'Y', $ts ), true );
                        case 'currenttime':
-                               return $varCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false );
+                               return $this->mVarCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false );
                        case 'currenthour':
-                               return $varCache[$index] = $wgContLang->formatNum( gmdate( 'H', $ts ), true );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'H', $ts ), true );
                        case 'currentweek':
                                // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
                                // int to remove the padding
-                               return $varCache[$index] = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) );
                        case 'currentdow':
-                               return $varCache[$index] = $wgContLang->formatNum( gmdate( 'w', $ts ) );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'w', $ts ) );
                        case 'localdayname':
-                               return $varCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 );
+                               return $this->mVarCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 );
                        case 'localyear':
-                               return $varCache[$index] = $wgContLang->formatNum( $localYear, true );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( $localYear, true );
                        case 'localtime':
-                               return $varCache[$index] = $wgContLang->time( $localTimestamp, false, false );
+                               return $this->mVarCache[$index] = $wgContLang->time( $localTimestamp, false, false );
                        case 'localhour':
-                               return $varCache[$index] = $wgContLang->formatNum( $localHour, true );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( $localHour, true );
                        case 'localweek':
                                // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
                                // int to remove the padding
-                               return $varCache[$index] = $wgContLang->formatNum( (int)$localWeek );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( (int)$localWeek );
                        case 'localdow':
-                               return $varCache[$index] = $wgContLang->formatNum( $localDayOfWeek );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( $localDayOfWeek );
                        case 'numberofarticles':
-                               return $varCache[$index] = $wgContLang->formatNum( SiteStats::articles() );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::articles() );
                        case 'numberoffiles':
-                               return $varCache[$index] = $wgContLang->formatNum( SiteStats::images() );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::images() );
                        case 'numberofusers':
-                               return $varCache[$index] = $wgContLang->formatNum( SiteStats::users() );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::users() );
                        case 'numberofpages':
-                               return $varCache[$index] = $wgContLang->formatNum( SiteStats::pages() );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::pages() );
                        case 'numberofadmins':
-                               return $varCache[$index] = $wgContLang->formatNum( SiteStats::admins() );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::admins() );
                        case 'numberofedits':
-                               return $varCache[$index] = $wgContLang->formatNum( SiteStats::edits() );
+                               return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::edits() );
                        case 'currenttimestamp':
-                               return $varCache[$index] = wfTimestampNow();
+                               return $this->mVarCache[$index] = wfTimestamp( TS_MW, $ts );
                        case 'localtimestamp':
-                               return $varCache[$index] = $localTimestamp;
+                               return $this->mVarCache[$index] = $localTimestamp;
                        case 'currentversion':
-                               return $varCache[$index] = SpecialVersion::getVersion();
+                               return $this->mVarCache[$index] = SpecialVersion::getVersion();
                        case 'sitename':
                                return $wgSitename;
                        case 'server':
@@ -2576,7 +2583,7 @@ class Parser
                                return $wgContLanguageCode;
                        default:
                                $ret = null;
-                               if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ) ) )
+                               if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret ) ) )
                                        return $ret;
                                else
                                        return null;
@@ -2936,7 +2943,9 @@ class Parser
                        # Clean up argument array
                        $newFrame = $frame->newChild( $args, $title );
 
-                       if ( $titleText !== false && $newFrame->isEmpty() ) {
+                       if ( $nowiki ) {
+                               $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
+                       } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
                                # Expansion is eligible for the empty-frame cache
                                if ( isset( $this->mTplExpandCache[$titleText] ) ) {
                                        $text = $this->mTplExpandCache[$titleText];
@@ -2949,6 +2958,10 @@ class Parser
                                $text = $newFrame->expand( $text );
                        }
                }
+               if ( $isLocalObj && $nowiki ) {
+                       $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
+                       $isLocalObj = false;
+               }
 
                # Replace raw HTML by a placeholder
                # Add a blank line preceding, to prevent it from mucking up
@@ -3635,7 +3648,7 @@ class Parser
                        $oldtz = getenv( 'TZ' );
                        putenv( 'TZ='.$wgLocaltimezone );
                }
-               $d = $wgContLang->timeanddate( date( 'YmdHis' ), false, false) .
+               $d = $wgContLang->timeanddate( $this->mOptions->getTimestamp(), false, false) .
                  ' (' . date( 'T' ) . ')';
                if ( isset( $wgLocaltimezone ) ) {
                        putenv( 'TZ='.$oldtz );
@@ -4776,6 +4789,9 @@ class Parser
         */
        function testSrvus( $text, $title, $options, $outputType = self::OT_HTML ) {
                $this->clearState();
+               if ( ! ( $title instanceof Title ) ) {
+                       $title = Title::newFromText( $title );
+               }
                $this->mTitle = $title;
                $this->mOptions = $options;
                $this->setOutputType( $outputType );
@@ -4787,10 +4803,16 @@ class Parser
 
        function testPst( $text, $title, $options ) {
                global $wgUser;
+               if ( ! ( $title instanceof Title ) ) {
+                       $title = Title::newFromText( $title );
+               }
                return $this->preSaveTransform( $text, $title, $wgUser, $options );
        }
 
        function testPreprocess( $text, $title, $options ) {
+               if ( ! ( $title instanceof Title ) ) {
+                       $title = Title::newFromText( $title );
+               }
                return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
        }
 }
index 5bab3eb..996bba2 100644 (file)
@@ -26,6 +26,7 @@ class ParserOptions
        var $mRemoveComments;            # Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS
        var $mTemplateCallback;          # Callback for template fetching
        var $mEnableLimitReport;         # Enable limit report in an HTML comment on output
+       var $mTimestamp;                 # Timestamp used for {{CURRENTDAY}} etc.
 
        var $mUser;                      # Stored user object, just used to initialise the skin
 
@@ -60,6 +61,13 @@ class ParserOptions
                return $this->mDateFormat;
        }
 
+       function getTimestamp() { 
+               if ( !isset( $this->mTimestamp ) ) {
+                       $this->mTimestamp = wfTimestampNow();
+               }
+               return $this->mTimestamp; 
+       }
+
        function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
        function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
        function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
@@ -78,6 +86,7 @@ class ParserOptions
        function setRemoveComments( $x )            { return wfSetVar( $this->mRemoveComments, $x ); }
        function setTemplateCallback( $x )          { return wfSetVar( $this->mTemplateCallback, $x ); }
        function enableLimitReport( $x = true )     { return wfSetVar( $this->mEnableLimitReport, $x ); }
+       function setTimestamp( $x )                 { return wfSetVar( $this->mTimestamp, $x ); }
 
        function __construct( $user = null ) {
                $this->initialiseFromUser( $user );
index e4a9362..754434a 100644 (file)
@@ -18,7 +18,7 @@ class Parser_DiffTest
                foreach ( $this->conf['parsers'] as $i => $parserConf ) {
                        if ( !is_array( $parserConf ) ) {
                                $class = $parserConf;
-                               $parserconf = array( 'class' => $parserConf );
+                               $parserConf = array( 'class' => $parserConf );
                        } else {
                                $class = $parserConf['class'];
                        }
index d7701c0..3a712b0 100644 (file)
@@ -1,16 +1,37 @@
 <?php
 
 class Preprocessor_DOM implements Preprocessor {
-       var $parser;
+       var $parser, $memoryLimit;
 
        function __construct( $parser ) {
                $this->parser = $parser;
+               $mem = ini_get( 'memory_limit' );
+               $this->memoryLimit = false;
+               if ( strval( $mem ) !== '' && $mem != -1 ) {
+                       if ( preg_match( '/^\d+$/', $mem ) ) {
+                               $this->memoryLimit = $mem;
+                       } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) {
+                               $this->memoryLimit = $m[1] * 1048576;
+                       }
+               }
        }
 
        function newFrame() {
                return new PPFrame_DOM( $this );
        }
 
+       function memCheck() {
+               if ( $this->memoryLimit === false ) {
+                       return;
+               }
+               $usage = memory_get_usage();
+               if ( $usage > $this->memoryLimit * 0.9 ) {
+                       $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 );
+                       throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" );
+               }
+               return $usage <= $this->memoryLimit * 0.8;
+       }
+
        /**
         * Preprocess some wikitext and return the document tree.
         * This is the ghost of Parser::replace_variables(). 
@@ -78,11 +99,11 @@ class Preprocessor_DOM implements Preprocessor {
        
                $stack = new PPDStack;
 
-               $searchBase = '[{<';
+               $searchBase = '[{<'; #}
                $revText = strrev( $text ); // For fast reverse searches
 
                $i = 0;                     # Input pointer, starts out pointing to a pseudo-newline before the start
-               $accum =& $stack->getAccum();   # Current text accumulator
+               $accum =& $stack->getAccum();   # Current accumulator
                $accum = '<root>';
                $findEquals = false;            # True to find equals signs in arguments
                $findPipe = false;              # True to take notice of pipe characters
@@ -93,6 +114,8 @@ class Preprocessor_DOM implements Preprocessor {
                $fakeLineStart = true;     # Do a line-start run without outputting an LF character
 
                while ( true ) {
+                       if ( ! ($i % 10) ) $this->memCheck();
+
                        if ( $findOnlyinclude ) {
                                // Ignore all input up to the next <onlyinclude>
                                $startPos = strpos( $text, '<onlyinclude>', $i );
@@ -241,6 +264,17 @@ class Preprocessor_DOM implements Preprocessor {
                                                        $endPos += 2;
                                                }
 
+                                               /*
+                                               if ( $stack->top ) {
+                                                       if ( $stack->top->commentEndPos !== false && $stack->top->commentEndPos == $wsStart ) {
+                                                               // Comments abutting, no change in visual end
+                                                               $stack->top->commentEndPos = $wsEnd;
+                                                       } else {
+                                                               $stack->top->visualEndPos = $wsStart;
+                                                               $stack->top->commentEndPos = $wsEnd;
+                                                       }
+                                               }
+                                                */
                                                $i = $endPos + 1;
                                                $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
                                                $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
@@ -326,7 +360,7 @@ class Preprocessor_DOM implements Preprocessor {
                                        $piece = array(
                                                'open' => "\n",
                                                'close' => "\n",
-                                               'parts' => array( str_repeat( '=', $count ) ),
+                                               'parts' => array( new PPDPart( str_repeat( '=', $count ) ) ),
                                                'startPos' => $i,
                                                'count' => $count );
                                        $stack->push( $piece );
@@ -395,8 +429,6 @@ class Preprocessor_DOM implements Preprocessor {
                                                'open' => $curChar,
                                                'close' => $rule['end'],
                                                'count' => $count,
-                                               'parts' => array( '' ),
-                                               'eqpos' => array(),
                                                'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
                                        );
 
@@ -444,14 +476,12 @@ class Preprocessor_DOM implements Preprocessor {
                                $name = $rule['names'][$matchingCount];
                                if ( $name === null ) {
                                        // No element, just literal text
-                                       $element = str_repeat( $piece->open, $matchingCount ) .
-                                               implode( '|', $piece->parts ) . 
-                                               str_repeat( $rule['end'], $matchingCount );
+                                       $element = $piece->breakSyntax( $matchingCount ) . str_repeat( $rule['end'], $matchingCount );
                                } else {
                                        # Create XML element
                                        # Note: $parts is already XML, does not need to be encoded further
                                        $parts = $piece->parts;
-                                       $title = $parts[0];
+                                       $title = $parts[0]->out;
                                        unset( $parts[0] );
 
                                        # The invocation is at the start of the line if lineStart is set in 
@@ -466,13 +496,12 @@ class Preprocessor_DOM implements Preprocessor {
                                        $element .= "<title>$title</title>";
                                        $argIndex = 1;
                                        foreach ( $parts as $partIndex => $part ) {
-                                               if ( isset( $piece->eqpos[$partIndex] ) ) {
-                                                       $eqpos = $piece->eqpos[$partIndex];
-                                                       $argName = substr( $part, 0, $eqpos );
-                                                       $argValue = substr( $part, $eqpos + 1 );
+                                               if ( isset( $part->eqpos ) ) {
+                                                       $argName = substr( $part->out, 0, $part->eqpos );
+                                                       $argValue = substr( $part->out, $part->eqpos + 1 );
                                                        $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
                                                } else {
-                                                       $element .= "<part><name index=\"$argIndex\" /><value>$part</value></part>";
+                                                       $element .= "<part><name index=\"$argIndex\" /><value>{$part->out}</value></part>";
                                                        $argIndex++;
                                                }
                                        }
@@ -488,9 +517,8 @@ class Preprocessor_DOM implements Preprocessor {
 
                                # Re-add the old stack element if it still has unmatched opening characters remaining
                                if ($matchingCount < $piece->count) {
-                                       $piece->parts = array( '' );
+                                       $piece->parts = array( new PPDPart );
                                        $piece->count -= $matchingCount;
-                                       $piece->eqpos = array();
                                        # do we still qualify for any callback with remaining count?
                                        $names = $rules[$piece->open]['names'];
                                        $skippedBraces = 0;
@@ -515,15 +543,14 @@ class Preprocessor_DOM implements Preprocessor {
                        
                        elseif ( $found == 'pipe' ) {
                                $findEquals = true; // shortcut for getFlags()
-                               $stack->top->addPart();
+                               $stack->addPart();
                                $accum =& $stack->getAccum();
                                ++$i;
                        }
                        
                        elseif ( $found == 'equals' ) {
                                $findEquals = false; // shortcut for getFlags()
-                               $partsCount = count( $stack->top->parts );
-                               $stack->top->eqpos[$partsCount - 1] = strlen( $accum );
+                               $stack->getCurrentPart()->eqpos = strlen( $accum );
                                $accum .= '=';
                                ++$i;
                        }
@@ -531,14 +558,10 @@ class Preprocessor_DOM implements Preprocessor {
 
                # Output any remaining unclosed brackets
                foreach ( $stack->stack as $piece ) {
-                       if ( $piece->open == "\n" ) {
-                               $stack->topAccum .= $piece->parts[0];
-                       } else {
-                               $stack->topAccum .= str_repeat( $piece->open, $piece->count ) . implode( '|', $piece->parts );
-                       }
+                       $stack->rootAccum .= $piece->breakSyntax();
                }
-               $stack->topAccum .= '</root>';
-               $xml = $stack->topAccum;
+               $stack->rootAccum .= '</root>';
+               $xml = $stack->rootAccum;
 
                wfProfileOut( __METHOD__.'-makexml' );
                wfProfileIn( __METHOD__.'-loadXML' );
@@ -561,6 +584,156 @@ class Preprocessor_DOM implements Preprocessor {
        }
 }
 
+/**
+ * Stack class to help Preprocessor::preprocessToObj()
+ */
+class PPDStack {
+       var $stack, $rootAccum, $top;
+       var $out;
+       static $false = false;
+
+       function __construct() {
+               $this->stack = array();
+               $this->top = false;
+               $this->rootAccum = '';
+               $this->accum =& $this->rootAccum;
+       }
+
+       function count() {
+               return count( $this->stack );
+       }
+
+       function &getAccum() {
+               return $this->accum;
+       }
+
+       function getCurrentPart() {
+               if ( $this->top === false ) {
+                       return false;
+               } else {
+                       return $this->top->getCurrentPart();
+               }
+       }
+
+       function push( $data ) {
+               if ( $data instanceof PPDStackElement ) {
+                       $this->stack[] = $data;
+               } else {
+                       $this->stack[] = new PPDStackElement( $data );
+               }
+               $this->top = $this->stack[ count( $this->stack ) - 1 ];
+               $this->accum =& $this->top->getAccum();
+       }
+
+       function pop() {
+               if ( !count( $this->stack ) ) {
+                       throw new MWException( __METHOD__.': no elements remaining' );
+               }
+               $temp = array_pop( $this->stack );
+
+               if ( count( $this->stack ) ) {
+                       $this->top = $this->stack[ count( $this->stack ) - 1 ];
+                       $this->accum =& $this->top->getAccum();
+               } else {
+                       $this->top = self::$false;
+                       $this->accum =& $this->rootAccum;
+               }
+               return $temp;
+       }
+
+       function addPart( $s = '' ) {
+               $this->top->addPart( $s );
+               $this->accum =& $this->top->getAccum();
+       }
+
+       function getFlags() {
+               if ( !count( $this->stack ) ) {
+                       return array( 
+                               'findEquals' => false, 
+                               'findPipe' => false,
+                               'inHeading' => false,
+                       );
+               } else {
+                       return $this->top->getFlags();
+               }
+       }
+}
+
+class PPDStackElement {
+       var $open,                      // Opening character (\n for heading)
+               $close,             // Matching closing character
+               $count,             // Number of opening characters found (number of "=" for heading)
+               $parts,             // Array of PPDPart objects describing pipe-separated parts.
+               $lineStart;         // True if the open char appeared at the start of the input line. Not set for headings.
+
+       function __construct( $data = array() ) {
+               $this->parts = array( new PPDPart );
+
+               foreach ( $data as $name => $value ) {
+                       $this->$name = $value;
+               }
+       }
+
+       function &getAccum() {
+               return $this->parts[count($this->parts) - 1]->out;
+       }
+
+       function addPart( $s = '' ) {
+               $this->parts[] = new PPDPart( $s );
+       }
+
+       function getCurrentPart() {
+               return $this->parts[count($this->parts) - 1];
+       }
+
+       function getFlags() {
+               $partCount = count( $this->parts );
+               $findPipe = $this->open != "\n" && $this->open != '[';
+               return array(
+                       'findPipe' => $findPipe,
+                       'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ),
+                       'inHeading' => $this->open == "\n",
+               );
+       }
+
+       /**
+        * Get the output string that would result if the close is not found.
+        */
+       function breakSyntax( $openingCount = false ) {
+               if ( $this->open == "\n" ) {
+                       $s = $this->parts[0]->out;
+               } else {
+                       if ( $openingCount === false ) {
+                               $openingCount = $this->count;
+                       }
+                       $s = str_repeat( $this->open, $openingCount );
+                       $first = true;
+                       foreach ( $this->parts as $part ) {
+                               if ( $first ) {
+                                       $first = false;
+                               } else {
+                                       $s .= '|';
+                               }
+                               $s .= $part->out;
+                       }
+               }
+               return $s;
+       }
+}
+
+class PPDPart {
+       var $out; // Output accumulator string
+
+       // Optional member variables: 
+       //   eqpos        Position of equals sign in output accumulator
+       //   commentEnd   Past-the-end input pointer for the last comment encountered
+       //   visualEnd    Past-the-end input pointer for the end of the accumulator minus comments
+
+       function __construct( $out = '' ) {
+               $this->out = $out;
+       }
+}
+
 /**
  * An expansion frame, used as a context to expand the result of preprocessToDom()
  */
@@ -1037,91 +1210,6 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
        }
 }
 
-/**
- * Stack class to help Parser::preprocessToDom()
- */
-class PPDStack {
-       var $stack, $topAccum, $top;
-
-       function __construct() {
-               $this->stack = array();
-               $this->topAccum = '';
-               $this->top = false;
-       }
-
-       function &getAccum() {
-               if ( count( $this->stack ) ) {
-                       return $this->top->getAccum();
-               } else {
-                       return $this->topAccum;
-               }
-       }
-
-       function push( $data ) {
-               if ( $data instanceof PPDStackElement ) {
-                       $this->stack[] = $data;
-               } else {
-                       $this->stack[] = new PPDStackElement( $data );
-               }
-               $this->top =& $this->stack[ count( $this->stack ) - 1 ];
-       }
-
-       function pop() {
-               if ( !count( $this->stack ) ) {
-                       throw new MWException( __METHOD__.': no elements remaining' );
-               }
-               $temp = array_pop( $this->stack );
-               if ( count( $this->stack ) ) {
-                       $this->top =& $this->stack[ count( $this->stack ) - 1 ];
-               } else {
-                       $this->top = false;
-               }
-       }
-
-       function getFlags() {
-               if ( !count( $this->stack ) ) {
-                       return array( 
-                               'findEquals' => false, 
-                               'findPipe' => false,
-                               'inHeading' => false,
-                       );
-               } else {
-                       return $this->top->getFlags();
-               }
-       }
-}
-
-class PPDStackElement {
-       var $open, $close, $count, $parts, $eqpos, $lineStart;
-
-       function __construct( $data = array() ) {
-               $this->parts = array( '' );
-               $this->eqpos = array();
-
-               foreach ( $data as $name => $value ) {
-                       $this->$name = $value;
-               }
-       }
-
-       function &getAccum() {
-               return $this->parts[count($this->parts) - 1];
-       }
-
-       function addPart( $s = '' ) {
-               $this->parts[] = $s;
-       }
-
-       function getFlags() {
-               $partCount = count( $this->parts );
-               $findPipe = $this->open != "\n" && $this->open != '[';
-               return array(
-                       'findPipe' => $findPipe,
-                       'findEquals' => $findPipe && $partCount > 1 && !isset( $this->eqpos[$partCount - 1] ),
-                       'inHeading' => $this->open == "\n",
-               );
-       }
-}
-
 class PPNode_DOM implements PPNode {
        var $node;
 
@@ -1143,7 +1231,7 @@ class PPNode_DOM implements PPNode {
                                $s .= $node->ownerDocument->saveXML( $node );
                        }
                } else {
-                       $s = $this->node->ownerDocument->saveXML( $node );
+                       $s = $this->node->ownerDocument->saveXML( $this->node );
                }
                return $s;
        }
index e3cd405..d814c4f 100644 (file)
@@ -6,7 +6,7 @@ $wgHooks['BeforeParserFetchTemplateAndtitle'][] = 'PPFuzzTester::templateHook';
 
 class PPFuzzTester {
        var $hairs = array(
-               '[[', ']]', '{{', '}}', '{{{', '}}}', 
+               '[[', ']]', '{{', '{{', '}}', '}}', '{{{', '}}}', 
                '<', '>', '<nowiki', '<gallery', '</nowiki>', '</gallery>', '<nOwIkI>', '</NoWiKi>',
                '<!--' , '-->',
                "\n==", "==\n",
@@ -23,6 +23,7 @@ class PPFuzzTester {
        var $maxTemplates = 5;
        //var $outputTypes = array( 'OT_HTML', 'OT_WIKI', 'OT_PREPROCESS' );
        var $entryPoints = array( 'testSrvus', 'testPst', 'testPreprocess' );
+       var $verbose = false;
        static $currentTest = false;
 
        function execute() {
@@ -33,10 +34,14 @@ class PPFuzzTester {
                        echo "Unable to create 'results' directory\n";
                        exit( 1 );
                }
-               for ( $i = 0; true; $i++ ) {
+               $overallStart = microtime( true );
+               $reportInterval = 1000;
+               for ( $i = 1; true; $i++ ) {
+                       $t = -microtime( true );
                        try {
                                self::$currentTest = new PPFuzzTest( $this );
                                self::$currentTest->execute();
+                               $passed = 'passed';
                        } catch ( MWException $e ) {
                                $testReport = self::$currentTest->getReport();
                                $exceptionReport = $e->getText();
@@ -45,8 +50,30 @@ class PPFuzzTester {
                                file_put_contents( "results/ppft-$hash.fail", 
                                        "Input:\n$testReport\n\nException report:\n$exceptionReport\n" );
                                print "Test $hash failed\n";
+                               $passed = 'failed';
                        }
-                       if ( $i % 1000 == 0 ) {
+                       $t += microtime( true );
+
+                       if ( $this->verbose ) {
+                               printf( "Test $passed in %.3f seconds\n", $t );
+                               print self::$currentTest->getReport();
+                       }
+
+                       $reportMetric = ( microtime( true ) - $overallStart ) / $i * $reportInterval;
+                       if ( $reportMetric > 25 ) {
+                               if ( substr( $reportInterval, 0, 1 ) === '1' ) {
+                                       $reportInterval /= 2;
+                               } else {
+                                       $reportInterval /= 5;
+                               }
+                       } elseif ( $reportMetric < 4 ) {
+                               if ( substr( $reportInterval, 0, 1 ) === '1' ) {
+                                       $reportInterval *= 5;
+                               } else {
+                                       $reportInterval *= 2;
+                               }
+                       }
+                       if ( $i % $reportInterval == 0 ) {
                                print "$i tests done\n";
                                /*
                                $testReport = self::$currentTest->getReport();
@@ -54,10 +81,14 @@ class PPFuzzTester {
                                file_put_contents( $filename, "Input:\n$testReport\n" );*/
                        }
                }
+               wfLogProfilingData();
        }
 
-       function makeInputText() {
-               $length = mt_rand( $this->minLength, $this->maxLength );
+       function makeInputText( $max = false ) {
+               if ( $max === false ) {
+                       $max = $this->maxLength;
+               }
+               $length = mt_rand( $this->minLength, $max );
                $s = '';
                for ( $i = 0; $i < $length; $i++ ) {
                        $hairIndex = mt_rand( 0, count( $this->hairs ) - 1 );
@@ -88,15 +119,16 @@ class PPFuzzTester {
 }
 
 class PPFuzzTest {
-       var $templates, $mainText, $title, $entryPoint;
+       var $templates, $mainText, $title, $entryPoint, $output;
 
        function __construct( $tester ) {
+               global $wgMaxSigChars;
                $this->parent = $tester;
                $this->mainText = $tester->makeInputText();
                $this->title = $tester->makeTitle();
                //$this->outputType = $tester->pickOutputType();
                $this->entryPoint = $tester->pickEntryPoint();
-               $this->nickname = $tester->makeInputText();
+               $this->nickname = $tester->makeInputText( $wgMaxSigChars + 10);
                $this->fancySig = (bool)mt_rand( 0, 1 );
                $this->templates = array();
        }
@@ -138,8 +170,9 @@ class PPFuzzTest {
 
                $options = new ParserOptions;
                $options->setTemplateCallback( array( $this, 'templateHook' ) );
-               //$wgParser->startExternalParse( $this->title, $options, constant( $this->outputType ) );
-               return call_user_func( array( $wgParser, $this->entryPoint ), $this->mainText, $this->title, $options );
+               $options->setTimestamp( wfTimestampNow() );
+               $this->output = call_user_func( array( $wgParser, $this->entryPoint ), $this->mainText, $this->title->getPrefixedText(), $options );
+               return $this->output;
        }
 
        function getReport() {
@@ -156,6 +189,7 @@ class PPFuzzTest {
                                $s .= "[[$titleText]]: " . var_export( $template['text'], true ) . "\n";
                        }
                }
+               $s .= "Output: " . var_export( $this->output, true ) . "\n";
                return $s;
        }
 }
@@ -163,6 +197,14 @@ class PPFuzzTest {
 class PPFuzzUser extends User {
        var $ppfz_test;
 
+       function load() {
+               if ( $this->mDataLoaded ) {
+                       return;
+               }
+               $this->mDataLoaded = true;
+               $this->loadDefaults( $this->mName );
+       }
+
        function getOption( $option, $defaultOverride = '' ) {
                if ( $option === 'fancysig' ) {
                        return $this->ppfz_test->fancySig;
@@ -182,10 +224,10 @@ if ( isset( $args[0] ) ) {
                exit( 1 );
        }
        $test = unserialize( $testText );
-       print $test->getReport();
        $result = $test->execute();
-       print "Test passed.\nResult: $result\n";
+       print "Test passed.\n";
 } else {
        $tester = new PPFuzzTester;
+       $tester->verbose = isset( $options['verbose'] );
        $tester->execute();
 }