- wfProfileIn( __METHOD__ );
- wfProfileIn( __METHOD__.'-makexml' );
-
- static $msgRules, $normalRules, $inclusionSupertags, $nonInclusionSupertags;
- if ( !$msgRules ) {
- $msgRules = array(
- '{' => array(
- 'end' => '}',
- 'names' => array(
- 2 => 'template',
- ),
- 'min' => 2,
- 'max' => 2,
- ),
- '[' => array(
- 'end' => ']',
- 'names' => array( 2 => null ),
- 'min' => 2,
- 'max' => 2,
- )
- );
- $normalRules = array(
- '{' => array(
- 'end' => '}',
- 'names' => array(
- 2 => 'template',
- 3 => 'tplarg',
- ),
- 'min' => 2,
- 'max' => 3,
- ),
- '[' => array(
- 'end' => ']',
- 'names' => array( 2 => null ),
- 'min' => 2,
- 'max' => 2,
- )
- );
- }
- if ( $this->ot['msg'] ) {
- $rules = $msgRules;
- } else {
- $rules = $normalRules;
- }
- $forInclusion = $flags & self::PTD_FOR_INCLUSION;
-
- $xmlishElements = $this->getStripList();
- $enableOnlyinclude = false;
- if ( $forInclusion ) {
- $ignoredTags = array( 'includeonly', '/includeonly' );
- $ignoredElements = array( 'noinclude' );
- $xmlishElements[] = 'noinclude';
- if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) {
- $enableOnlyinclude = true;
- }
- } else {
- $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' );
- $ignoredElements = array( 'includeonly' );
- $xmlishElements[] = 'includeonly';
- }
- $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
-
- // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
- $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
-
- $stack = array(); # Stack of unclosed parentheses
- $stackIndex = -1; # Stack read pointer
-
- $searchBase = implode( '', array_keys( $rules ) ) . '<';
- $revText = strrev( $text ); // For fast reverse searches
-
- $i = -1; # Input pointer, starts out pointing to a pseudo-newline before the start
- $topAccum = '<root>'; # Top level text accumulator
- $accum =& $topAccum; # Current text accumulator
- $findEquals = false; # True to find equals signs in arguments
- $findHeading = false; # True to look at LF characters for possible headings
- $findPipe = false; # True to take notice of pipe characters
- $headingIndex = 1;
- $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i
- $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
-
- if ( $enableOnlyinclude ) {
- $i = 0;
- }
-
- while ( true ) {
- if ( $findOnlyinclude ) {
- // Ignore all input up to the next <onlyinclude>
- $startPos = strpos( $text, '<onlyinclude>', $i );
- if ( $startPos === false ) {
- // Ignored section runs to the end
- $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
- break;
- }
- $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
- $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
- $i = $tagEndPos;
- $findOnlyinclude = false;
- }
-
- if ( $i == -1 ) {
- $found = 'line-start';
- $curChar = '';
- } else {
- # Find next opening brace, closing brace or pipe
- $search = $searchBase;
- if ( $stackIndex == -1 ) {
- $currentClosing = '';
- // Look for headings only at the top stack level
- // Among other things, this resolves the ambiguity between =
- // for headings and = for template arguments
- $search .= "\n";
- } else {
- $currentClosing = $stack[$stackIndex]['close'];
- $search .= $currentClosing;
- }
- if ( $findPipe ) {
- $search .= '|';
- }
- if ( $findEquals ) {
- $search .= '=';
- }
- $rule = null;
- # Output literal section, advance input counter
- $literalLength = strcspn( $text, $search, $i );
- if ( $literalLength > 0 ) {
- $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
- $i += $literalLength;
- }
- if ( $i >= strlen( $text ) ) {
- if ( $currentClosing == "\n" ) {
- // Do a past-the-end run to finish off the heading
- $curChar = '';
- $found = 'line-end';
- } else {
- # All done
- break;
- }
- } else {
- $curChar = $text[$i];
- if ( $curChar == '|' ) {
- $found = 'pipe';
- } elseif ( $curChar == '=' ) {
- $found = 'equals';
- } elseif ( $curChar == '<' ) {
- $found = 'angle';
- } elseif ( $curChar == "\n" ) {
- if ( $stackIndex == -1 ) {
- $found = 'line-start';
- } else {
- $found = 'line-end';
- }
- } elseif ( $curChar == $currentClosing ) {
- $found = 'close';
- } elseif ( isset( $rules[$curChar] ) ) {
- $found = 'open';
- $rule = $rules[$curChar];
- } else {
- # Some versions of PHP have a strcspn which stops on null characters
- # Ignore and continue
- ++$i;
- continue;
- }
- }
- }
-
- if ( $found == 'angle' ) {
- $matches = false;
- // Handle </onlyinclude>
- if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) {
- $findOnlyinclude = true;
- continue;
- }
-
- // Determine element name
- if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
- // Element name missing or not listed
- $accum .= '<';
- ++$i;
- continue;
- }
- // Handle comments
- if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
- // To avoid leaving blank lines, when a comment is both preceded
- // and followed by a newline (ignoring spaces), trim leading and
- // trailing spaces and one of the newlines.
-
- // Find the end
- $endPos = strpos( $text, '-->', $i + 4 );
- if ( $endPos === false ) {
- // Unclosed comment in input, runs to end
- $inner = substr( $text, $i );
- $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
- $i = strlen( $text );
- } else {
- // Search backwards for leading whitespace
- $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
- // Search forwards for trailing whitespace
- // $wsEnd will be the position of the last space
- $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
- // Eat the line if possible
- if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
- && substr( $text, $wsEnd + 1, 1 ) == "\n" )
- {
- $startPos = $wsStart;
- $endPos = $wsEnd + 1;
- // Remove leading whitespace from the end of the accumulator
- // Sanity check first though
- $wsLength = $i - $wsStart;
- if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) {
- $accum = substr( $accum, 0, -$wsLength );
- }
- } else {
- // No line to eat, just take the comment itself
- $startPos = $i;
- $endPos += 2;
- }
-
- $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
- $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
- $i = $endPos + 1;
- }
- continue;
- }
- $name = $matches[1];
- $attrStart = $i + strlen( $name ) + 1;
-
- // Find end of tag
- $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
- if ( $tagEndPos === false ) {
- // Infinite backtrack
- // Disable tag search to prevent worst-case O(N^2) performance
- $noMoreGT = true;
- $accum .= '<';
- ++$i;
- continue;
- }
-
- // Handle ignored tags
- if ( in_array( $name, $ignoredTags ) ) {
- $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>';
- $i = $tagEndPos + 1;
- continue;
- }
-
- $tagStartPos = $i;
- if ( $text[$tagEndPos-1] == '/' ) {
- $attrEnd = $tagEndPos - 1;
- $inner = null;
- $i = $tagEndPos + 1;
- $close = '';
- } else {
- $attrEnd = $tagEndPos;
- // Find closing tag
- if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) {
- $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
- $i = $matches[0][1] + strlen( $matches[0][0] );
- $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
- } else {
- // No end tag -- let it run out to the end of the text.
- $inner = substr( $text, $tagEndPos + 1 );
- $i = strlen( $text );
- $close = '';
- }
- }
- // <includeonly> and <noinclude> just become <ignore> tags
- if ( in_array( $name, $ignoredElements ) ) {
- $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
- . '</ignore>';
- continue;
- }
-
- $accum .= '<ext>';
- if ( $attrEnd <= $attrStart ) {
- $attr = '';
- } else {
- $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
- }
- $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
- // Note that the attr element contains the whitespace between name and attribute,
- // this is necessary for precise reconstruction during pre-save transform.
- '<attr>' . htmlspecialchars( $attr ) . '</attr>';
- if ( $inner !== null ) {
- $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
- }
- $accum .= $close . '</ext>';
- }
-
- elseif ( $found == 'line-start' ) {
- // Is this the start of a heading?
- // Line break belongs before the heading element in any case
- $accum .= $curChar;
- $i++;
-
- $count = strspn( $text, '=', $i, 6 );
- if ( $count > 0 ) {
- $piece = array(
- 'open' => "\n",
- 'close' => "\n",
- 'parts' => array( str_repeat( '=', $count ) ),
- 'count' => $count );
- $stack[++$stackIndex] = $piece;
- $i += $count;
- $accum =& $stack[$stackIndex]['parts'][0];
- $findPipe = false;
- }
- }
-
- elseif ( $found == 'line-end' ) {
- $piece = $stack[$stackIndex];
- // A heading must be open, otherwise \n wouldn't have been in the search list
- assert( $piece['open'] == "\n" );
- assert( $stackIndex == 0 );
- // Search back through the input to see if it has a proper close
- // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient
- $m = false;
- $count = $piece['count'];
- if ( preg_match( "/\s*(={{$count}})/A", $revText, $m, 0, strlen( $text ) - $i ) ) {
- // Found match, output <h>
- $count = min( strlen( $m[1] ), $count );
- $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
- $headingIndex++;
- } else {
- // No match, no <h>, just pass down the inner text
- $element = $accum;
- }
- // Unwind the stack
- // Headings can only occur on the top level, so this is a bit simpler than the
- // generic stack unwind operation in the close case
- unset( $stack[$stackIndex--] );
- $accum =& $topAccum;
- $findEquals = false;
- $findPipe = false;
-
- // Append the result to the enclosing accumulator
- $accum .= $element;
- // Note that we do NOT increment the input pointer.
- // This is because the closing linebreak could be the opening linebreak of
- // another heading. Infinite loops are avoided because the next iteration MUST
- // hit the heading open case above, which unconditionally increments the
- // input pointer.
- }
-
- elseif ( $found == 'open' ) {
- # count opening brace characters
- $count = strspn( $text, $curChar, $i );
-
- # we need to add to stack only if opening brace count is enough for one of the rules
- if ( $count >= $rule['min'] ) {
- # Add it to the stack
- $piece = array(
- 'open' => $curChar,
- 'close' => $rule['end'],
- 'count' => $count,
- 'parts' => array( '' ),
- 'eqpos' => array(),
- 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
- );
-
- $stackIndex ++;
- $stack[$stackIndex] = $piece;
- $accum =& $stack[$stackIndex]['parts'][0];
- $findEquals = false;
- $findPipe = true;
- } else {
- # Add literal brace(s)
- $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
- }
- $i += $count;
- }
-
- elseif ( $found == 'close' ) {
- $piece = $stack[$stackIndex];
- # lets check if there are enough characters for closing brace
- $maxCount = $piece['count'];
- $count = strspn( $text, $curChar, $i, $maxCount );
-
- # check for maximum matching characters (if there are 5 closing
- # characters, we will probably need only 3 - depending on the rules)
- $matchingCount = 0;
- $rule = $rules[$piece['open']];
- if ( $count > $rule['max'] ) {
- # The specified maximum exists in the callback array, unless the caller
- # has made an error
- $matchingCount = $rule['max'];
- } else {
- # Count is less than the maximum
- # Skip any gaps in the callback array to find the true largest match
- # Need to use array_key_exists not isset because the callback can be null
- $matchingCount = $count;
- while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
- --$matchingCount;
- }
- }
-
- if ($matchingCount <= 0) {
- # No matching element found in callback array
- # Output a literal closing brace and continue
- $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
- $i += $count;
- continue;
- }
- $name = $rule['names'][$matchingCount];
- if ( $name === null ) {
- // No element, just literal text
- $element = str_repeat( $piece['open'], $matchingCount ) .
- implode( '|', $piece['parts'] ) .
- str_repeat( $rule['end'], $matchingCount );
- } else {
- # Create XML element
- # Note: $parts is already XML, does not need to be encoded further
- $parts = $piece['parts'];
- $title = $parts[0];
- unset( $parts[0] );
-
- # The invocation is at the start of the line if lineStart is set in
- # the stack, and all opening brackets are used up.
- if ( $maxCount == $matchingCount && !empty( $piece['lineStart'] ) ) {
- $attr = ' lineStart="1"';
- } else {
- $attr = '';
- }
-
- $element = "<$name$attr>";
- $element .= "<title>$title</title>";
- $argIndex = 1;
- foreach ( $parts as $partIndex => $part ) {
- if ( isset( $piece['eqpos'][$partIndex] ) ) {
- $eqpos = $piece['eqpos'][$partIndex];
- $argName = substr( $part, 0, $eqpos );
- $argValue = substr( $part, $eqpos + 1 );
- $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
- } else {
- $element .= "<part><name index=\"$argIndex\" /><value>$part</value></part>";
- $argIndex++;
- }
- }
- $element .= "</$name>";
- }
-
- # Advance input pointer
- $i += $matchingCount;
-
- # Unwind the stack
- unset( $stack[$stackIndex--] );
- if ( $stackIndex == -1 ) {
- $accum =& $topAccum;
- $findEquals = false;
- $findPipe = false;
- } else {
- $partCount = count( $stack[$stackIndex]['parts'] );
- $accum =& $stack[$stackIndex]['parts'][$partCount - 1];
- $findPipe = $stack[$stackIndex]['open'] != "\n";
- $findEquals = $findPipe && $partCount > 1
- && !isset( $stack[$stackIndex]['eqpos'][$partCount - 1] );
- }
-
- # Re-add the old stack element if it still has unmatched opening characters remaining
- if ($matchingCount < $piece['count']) {
- $piece['parts'] = array( '' );
- $piece['count'] -= $matchingCount;
- $piece['eqpos'] = array();
- # do we still qualify for any callback with remaining count?
- $names = $rules[$piece['open']]['names'];
- $skippedBraces = 0;
- $enclosingAccum =& $accum;
- while ( $piece['count'] ) {
- if ( array_key_exists( $piece['count'], $names ) ) {
- $stackIndex++;
- $stack[$stackIndex] = $piece;
- $accum =& $stack[$stackIndex]['parts'][0];
- $findEquals = true;
- $findPipe = true;
- break;
- }
- --$piece['count'];
- $skippedBraces ++;
- }
- $enclosingAccum .= str_repeat( $piece['open'], $skippedBraces );
- }
-
- # Add XML element to the enclosing accumulator
- $accum .= $element;
- }
-
- elseif ( $found == 'pipe' ) {
- $stack[$stackIndex]['parts'][] = '';
- $partsCount = count( $stack[$stackIndex]['parts'] );
- $accum =& $stack[$stackIndex]['parts'][$partsCount - 1];
- $findEquals = true;
- ++$i;
- }
-
- elseif ( $found == 'equals' ) {
- $findEquals = false;
- $partsCount = count( $stack[$stackIndex]['parts'] );
- $stack[$stackIndex]['eqpos'][$partsCount - 1] = strlen( $accum );
- $accum .= '=';
- ++$i;
- }
- }
-
- # Output any remaining unclosed brackets
- foreach ( $stack as $piece ) {
- if ( $piece['open'] == "\n" ) {
- $topAccum .= $piece['parts'][0];
- } else {
- $topAccum .= str_repeat( $piece['open'], $piece['count'] ) . implode( '|', $piece['parts'] );
- }
- }
- $topAccum .= '</root>';
-
- wfProfileOut( __METHOD__.'-makexml' );
- wfProfileIn( __METHOD__.'-loadXML' );
- $dom = new DOMDocument;
- wfSuppressWarnings();
- $result = $dom->loadXML( $topAccum );
- wfRestoreWarnings();
- if ( !$result ) {
- // Try running the XML through UtfNormal to get rid of invalid characters
- $topAccum = UtfNormal::cleanUp( $topAccum );
- $result = $dom->loadXML( $topAccum );
- if ( !$result ) {
- throw new MWException( __METHOD__.' generated invalid XML' );
- }
- }
- wfProfileOut( __METHOD__.'-loadXML' );
- wfProfileOut( __METHOD__ );