<?php
+/**
+ * Preprocessor using PHP arrays
+ *
+ * @file
+ * @ingroup Parser
+ */
/**
* Differences from DOM schema:
* @ingroup Parser
*/
class Preprocessor_Hash implements Preprocessor {
+ /**
+ * @var Parser
+ */
var $parser;
+ const CACHE_VERSION = 1;
+
function __construct( $parser ) {
$this->parser = $parser;
}
+ /**
+ * @return PPFrame_Hash
+ */
function newFrame() {
return new PPFrame_Hash( $this );
}
+ /**
+ * @param $args
+ * @return PPCustomFrame_Hash
+ */
function newCustomFrame( $args ) {
return new PPCustomFrame_Hash( $this, $args );
}
+ /**
+ * @param $values array
+ * @return PPNode_Hash_Array
+ */
+ function newPartNodeArray( $values ) {
+ $list = array();
+
+ foreach ( $values as $k => $val ) {
+ $partNode = new PPNode_Hash_Tree( 'part' );
+ $nameNode = new PPNode_Hash_Tree( 'name' );
+
+ if ( is_int( $k ) ) {
+ $nameNode->addChild( new PPNode_Hash_Attr( 'index', $k ) );
+ $partNode->addChild( $nameNode );
+ } else {
+ $nameNode->addChild( new PPNode_Hash_Text( $k ) );
+ $partNode->addChild( $nameNode );
+ $partNode->addChild( new PPNode_Hash_Text( '=' ) );
+ }
+
+ $valueNode = new PPNode_Hash_Tree( 'value' );
+ $valueNode->addChild( new PPNode_Hash_Text( $val ) );
+ $partNode->addChild( $valueNode );
+
+ $list[] = $partNode;
+ }
+
+ $node = new PPNode_Hash_Array( $list );
+ return $node;
+ }
+
/**
* Preprocess some wikitext and return the document tree.
* This is the ghost of Parser::replace_variables().
*
- * @param string $text The text to parse
- * @param integer flags Bitwise combination of:
+ * @param $text String: the text to parse
+ * @param $flags Integer: bitwise combination of:
* Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
* included. Default is to assume a direct page view.
*
* cache may be implemented at a later date which takes further advantage of these strict
* dependency requirements.
*
- * @private
+ * @return PPNode_Hash_Tree
*/
function preprocessToObj( $text, $flags = 0 ) {
wfProfileIn( __METHOD__ );
+ // Check cache.
+ global $wgMemc, $wgPreprocessorCacheThreshold;
+
+ $cacheable = $wgPreprocessorCacheThreshold !== false && strlen( $text ) > $wgPreprocessorCacheThreshold;
+ if ( $cacheable ) {
+ wfProfileIn( __METHOD__.'-cacheable' );
+
+ $cacheKey = wfMemcKey( 'preprocess-hash', md5($text), $flags );
+ $cacheValue = $wgMemc->get( $cacheKey );
+ if ( $cacheValue ) {
+ $version = substr( $cacheValue, 0, 8 );
+ if ( intval( $version ) == self::CACHE_VERSION ) {
+ $hash = unserialize( substr( $cacheValue, 8 ) );
+ // From the cache
+ wfDebugLog( "Preprocessor",
+ "Loaded preprocessor hash from memcached (key $cacheKey)" );
+ wfProfileOut( __METHOD__.'-cacheable' );
+ wfProfileOut( __METHOD__ );
+ return $hash;
+ }
+ }
+ wfProfileIn( __METHOD__.'-cache-miss' );
+ }
+
$rules = array(
'{' => array(
'end' => '}',
// Search backwards for leading whitespace
$wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
// Search forwards for trailing whitespace
- // $wsEnd will be the position of the last space
+ // $wsEnd will be the position of the last space (or the '>' if there's none)
$wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
// Eat the line if possible
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
if ( $stack->top ) {
$part = $stack->top->getCurrentPart();
- if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) {
- // Comments abutting, no change in visual end
- $part->commentEnd = $wsEnd;
- } else {
+ if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) {
$part->visualEnd = $wsStart;
- $part->commentEnd = $endPos;
}
+ // Else comments abutting, no change in visual end
+ $part->commentEnd = $endPos;
}
$i = $endPos + 1;
$inner = substr( $text, $startPos, $endPos - $startPos + 1 );
} else {
$attrEnd = $tagEndPos;
// Find closing tag
- if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) {
+ if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+ $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
+ {
$inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
$i = $matches[0][1] + strlen( $matches[0][0] );
$close = $matches[0][0];
extract( $stack->getFlags() );
$i += $count;
}
- }
-
- elseif ( $found == 'line-end' ) {
+ } elseif ( $found == 'line-end' ) {
$piece = $stack->top;
// A heading must be open, otherwise \n wouldn't have been in the search list
assert( $piece->open == "\n" );
$count = $piece->count;
$equalsLength = strspn( $revText, '=', strlen( $text ) - $searchStart );
if ( $equalsLength > 0 ) {
- if ( $i - $equalsLength == $piece->startPos ) {
+ if ( $searchStart - $equalsLength == $piece->startPos ) {
// This is just a single string of equals signs on its own line
// Replicate the doHeadings behaviour /={count}(.+)={count}/
// First find out how many equals signs there really are (don't stop at 6)
// another heading. Infinite loops are avoided because the next iteration MUST
// hit the heading open case above, which unconditionally increments the
// input pointer.
- }
-
- elseif ( $found == 'open' ) {
+ } elseif ( $found == 'open' ) {
# count opening brace characters
$count = strspn( $text, $curChar, $i );
$accum->addLiteral( str_repeat( $curChar, $count ) );
}
$i += $count;
- }
-
- elseif ( $found == 'close' ) {
+ } elseif ( $found == 'close' ) {
$piece = $stack->top;
# lets check if there are enough characters for closing brace
$maxCount = $piece->count;
# check for maximum matching characters (if there are 5 closing
# characters, we will probably need only 3 - depending on the rules)
- $matchingCount = 0;
$rule = $rules[$piece->open];
if ( $count > $rule['max'] ) {
# The specified maximum exists in the callback array, unless the caller
$titleNode->lastChild = $titleAccum->lastNode;
$element->addChild( $titleNode );
$argIndex = 1;
- foreach ( $parts as $partIndex => $part ) {
+ foreach ( $parts as $part ) {
if ( isset( $part->eqpos ) ) {
// Find equals
$lastNode = false;
} else {
$accum->addAccum( $element );
}
- }
-
- elseif ( $found == 'pipe' ) {
+ } elseif ( $found == 'pipe' ) {
$findEquals = true; // shortcut for getFlags()
$stack->addPart();
$accum =& $stack->getAccum();
++$i;
- }
-
- elseif ( $found == 'equals' ) {
+ } elseif ( $found == 'equals' ) {
$findEquals = false; // shortcut for getFlags()
$accum->addNodeWithText( 'equals', '=' );
$stack->getCurrentPart()->eqpos = $accum->lastNode;
$rootNode = new PPNode_Hash_Tree( 'root' );
$rootNode->firstChild = $stack->rootAccum->firstNode;
$rootNode->lastChild = $stack->rootAccum->lastNode;
+
+ // Cache
+ if ($cacheable) {
+ $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode );
+ $wgMemc->set( $cacheKey, $cacheValue, 86400 );
+ wfProfileOut( __METHOD__.'-cache-miss' );
+ wfProfileOut( __METHOD__.'-cacheable' );
+ wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" );
+ }
+
wfProfileOut( __METHOD__ );
return $rootNode;
}
/**
* Get the accumulator that would result if the close is not found.
+ *
+ * @return PPDAccum_Hash
*/
function breakSyntax( $openingCount = false ) {
if ( $this->open == "\n" ) {
* @ingroup Parser
*/
class PPFrame_Hash implements PPFrame {
- var $preprocessor, $parser, $title;
+
+ /**
+ * @var Parser
+ */
+ var $parser;
+
+ /**
+ * @var Preprocessor
+ */
+ var $preprocessor;
+
+ /**
+ * @var Title
+ */
+ var $title;
var $titleCache;
/**
/**
* Construct a new preprocessor frame.
- * @param Preprocessor $preprocessor The parent preprocessor
+ * @param $preprocessor Preprocessor: the parent preprocessor
*/
function __construct( $preprocessor ) {
$this->preprocessor = $preprocessor;
/**
* Create a new child frame
* $args is optionally a multi-root PPNode or array containing the template arguments
+ *
+ * @param $args PPNode_Hash_Array|array
+ * @param $title Title|false
+ *
+ * @return PPTemplateFrame_Hash
*/
function newChild( $args = false, $title = false ) {
$namedArgs = array();
$title = $this->title;
}
if ( $args !== false ) {
- $xpath = false;
if ( $args instanceof PPNode_Hash_Array ) {
$args = $args->value;
} elseif ( !is_array( $args ) ) {
return new PPTemplateFrame_Hash( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
}
+ /**
+ * @throws MWException
+ * @param $root
+ * @param $flags int
+ * @return string
+ */
function expand( $root, $flags = 0 ) {
static $expansionDepth = 0;
if ( is_string( $root ) ) {
return $root;
}
- if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount )
- {
+ if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) {
return '<span class="error">Node-count limit exceeded</span>';
}
- if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) {
+ if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) {
return '<span class="error">Expansion depth limit exceeded</span>';
}
++$expansionDepth;
if ( $contextNode->name == 'template' ) {
# Double-brace expansion
$bits = $contextNode->splitTemplate();
- if ( $flags & self::NO_TEMPLATES ) {
+ if ( $flags & PPFrame::NO_TEMPLATES ) {
$newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $bits['title'], $bits['parts'] );
} else {
+ $bits['interwiki'] = $this->title->getInterwiki( );
$ret = $this->parser->braceSubstitution( $bits, $this );
if ( isset( $ret['object'] ) ) {
$newIterator = $ret['object'];
} elseif ( $contextNode->name == 'tplarg' ) {
# Triple-brace expansion
$bits = $contextNode->splitTemplate();
- if ( $flags & self::NO_ARGS ) {
+ if ( $flags & PPFrame::NO_ARGS ) {
$newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $bits['title'], $bits['parts'] );
} else {
$ret = $this->parser->argSubstitution( $bits, $this );
# Remove it in HTML, pre+remove and STRIP_COMMENTS modes
if ( $this->parser->ot['html']
|| ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
- || ( $flags & self::STRIP_COMMENTS ) )
+ || ( $flags & PPFrame::STRIP_COMMENTS ) )
{
$out .= '';
}
# Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
# Not in RECOVER_COMMENTS mode (extractSections) though
- elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) {
+ elseif ( $this->parser->ot['wiki'] && ! ( $flags & PPFrame::RECOVER_COMMENTS ) ) {
$out .= $this->parser->insertStripItem( $contextNode->firstChild->value );
}
# Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
# OT_WIKI will only respect <ignore> in substed templates.
# The other output types respect it unless NO_IGNORE is set.
# extractSections() sets NO_IGNORE and so never respects it.
- if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) {
+ if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & PPFrame::NO_IGNORE ) ) {
$out .= $contextNode->firstChild->value;
} else {
//$out .= '';
$serial = count( $this->parser->mHeadings ) - 1;
$marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
$s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] );
- $this->parser->mStripState->general->setPair( $marker, '' );
+ $this->parser->mStripState->addGeneral( $marker, '' );
$out .= $s;
} else {
# Expand in virtual stack
return $outStack[0];
}
+ /**
+ * @param $sep
+ * @param $flags
+ * @return string
+ */
function implodeWithFlags( $sep, $flags /*, ... */ ) {
$args = array_slice( func_get_args(), 2 );
/**
* Implode with no flags specified
* This previously called implodeWithFlags but has now been inlined to reduce stack depth
+ * @return string
*/
function implode( $sep /*, ... */ ) {
$args = array_slice( func_get_args(), 1 );
/**
* Makes an object that, when expand()ed, will be the same as one obtained
* with implode()
+ *
+ * @return PPNode_Hash_Array
*/
function virtualImplode( $sep /*, ... */ ) {
$args = array_slice( func_get_args(), 1 );
/**
* Virtual implode with brackets
+ *
+ * @return PPNode_Hash_Array
*/
function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) {
$args = array_slice( func_get_args(), 3 );
return 'frame{}';
}
+ /**
+ * @param $level bool
+ * @return array|bool|String
+ */
function getPDBK( $level = false ) {
if ( $level === false ) {
return $this->title->getPrefixedDBkey();
}
}
+ /**
+ * @return array
+ */
+ function getArguments() {
+ return array();
+ }
+
+ /**
+ * @return array
+ */
+ function getNumberedArguments() {
+ return array();
+ }
+
+ /**
+ * @return array
+ */
+ function getNamedArguments() {
+ return array();
+ }
+
/**
* Returns true if there are no arguments in this frame
+ *
+ * @return bool
*/
function isEmpty() {
return true;
}
+ /**
+ * @param $name
+ * @return bool
+ */
function getArgument( $name ) {
return false;
}
/**
* Returns true if the infinite loop check is OK, false if a loop is detected
+ *
+ * @param $title Title
+ *
+ * @return bool
*/
function loopCheck( $title ) {
return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
/**
* Return true if the frame is a template frame
+ *
+ * @return bool
*/
function isTemplate() {
return false;
var $numberedArgs, $namedArgs, $parent;
var $numberedExpansionCache, $namedExpansionCache;
+ /**
+ * @param $preprocessor
+ * @param $parent
+ * @param $numberedArgs array
+ * @param $namedArgs array
+ * @param $title Title
+ */
function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
- $this->preprocessor = $preprocessor;
- $this->parser = $preprocessor->parser;
+ parent::__construct( $preprocessor );
+
$this->parent = $parent;
$this->numberedArgs = $numberedArgs;
$this->namedArgs = $namedArgs;
}
/**
* Returns true if there are no arguments in this frame
+ *
+ * @return bool
*/
function isEmpty() {
return !count( $this->numberedArgs ) && !count( $this->namedArgs );
}
+ /**
+ * @return array
+ */
function getArguments() {
$arguments = array();
foreach ( array_merge(
}
return $arguments;
}
-
+
+ /**
+ * @return array
+ */
function getNumberedArguments() {
$arguments = array();
foreach ( array_keys($this->numberedArgs) as $key ) {
}
return $arguments;
}
-
+
+ /**
+ * @return array
+ */
function getNamedArguments() {
$arguments = array();
foreach ( array_keys($this->namedArgs) as $key ) {
return $arguments;
}
+ /**
+ * @param $index
+ * @return array|bool
+ */
function getNumberedArgument( $index ) {
if ( !isset( $this->numberedArgs[$index] ) ) {
return false;
}
if ( !isset( $this->numberedExpansionCache[$index] ) ) {
# No trimming for unnamed arguments
- $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS );
+ $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], PPFrame::STRIP_COMMENTS );
}
return $this->numberedExpansionCache[$index];
}
+ /**
+ * @param $name
+ * @return bool
+ */
function getNamedArgument( $name ) {
if ( !isset( $this->namedArgs[$name] ) ) {
return false;
if ( !isset( $this->namedExpansionCache[$name] ) ) {
# Trim named arguments post-expand, for backwards compatibility
$this->namedExpansionCache[$name] = trim(
- $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) );
+ $this->parent->expand( $this->namedArgs[$name], PPFrame::STRIP_COMMENTS ) );
}
return $this->namedExpansionCache[$name];
}
+ /**
+ * @param $name
+ * @return array|bool
+ */
function getArgument( $name ) {
$text = $this->getNumberedArgument( $name );
if ( $text === false ) {
/**
* Return true if the frame is a template frame
+ *
+ * @return bool
*/
function isTemplate() {
return true;
var $args;
function __construct( $preprocessor, $args ) {
- $this->preprocessor = $preprocessor;
- $this->parser = $preprocessor->parser;
+ parent::__construct( $preprocessor );
$this->args = $args;
}
return $s;
}
+ /**
+ * @return bool
+ */
function isEmpty() {
return !count( $this->args );
}
+ /**
+ * @param $index
+ * @return bool
+ */
function getArgument( $index ) {
if ( !isset( $this->args[$index] ) ) {
return false;
}
}
+ /**
+ * @param $name
+ * @param $text
+ * @return PPNode_Hash_Tree
+ */
static function newWithText( $name, $text ) {
$obj = new self( $name );
$obj->addChild( new PPNode_Hash_Text( $text ) );
}
}
+ /**
+ * @return PPNode_Hash_Array
+ */
function getChildren() {
$children = array();
for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
return $children;
}
- function getLength() { return false; }
- function item( $i ) { return false; }
+ /**
+ * @return bool
+ */
+ function getLength() {
+ return false;
+ }
+
+ /**
+ * @param $i
+ * @return bool
+ */
+ function item( $i ) {
+ return false;
+ }
+ /**
+ * @return string
+ */
function getName() {
return $this->name;
}
* name PPNode name
* index String index
* value PPNode value
+ *
+ * @return array
*/
function splitArg() {
$bits = array();
/**
* Split an <ext> node into an associative array containing name, attr, inner and close
* All values in the resulting array are PPNodes. Inner and close are optional.
+ *
+ * @return array
*/
function splitExt() {
$bits = array();
/**
* Split an <h> node
+ *
+ * @return array
*/
function splitHeading() {
if ( $this->name !== 'h' ) {
/**
* Split a <template> or <tplarg> node
+ *
+ * @return array
*/
function splitTemplate() {
$parts = array();