Merge "Make DBAccessBase use DBConnRef, rename $wiki, and hide getLoadBalancer()"
[lhc/web/wiklou.git] / includes / parser / Preprocessor_Hash.php
1 <?php
2 /**
3 * Preprocessor using PHP arrays
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 /**
25 * Differences from DOM schema:
26 * * attribute nodes are children
27 * * "<h>" nodes that aren't at the top are replaced with <possible-h>
28 *
29 * Nodes are stored in a recursive array data structure. A node store is an
30 * array where each element may be either a scalar (representing a text node)
31 * or a "descriptor", which is a two-element array where the first element is
32 * the node name and the second element is the node store for the children.
33 *
34 * Attributes are represented as children that have a node name starting with
35 * "@", and a single text node child.
36 *
37 * @todo: Consider replacing descriptor arrays with objects of a new class.
38 * Benchmark and measure resulting memory impact.
39 *
40 * @ingroup Parser
41 */
42 // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
43 class Preprocessor_Hash extends Preprocessor {
44 const CACHE_PREFIX = 'preprocess-hash';
45 const CACHE_VERSION = 2;
46
47 /**
48 * @param Parser $parser
49 */
50 public function __construct( $parser ) {
51 $this->parser = $parser;
52 }
53
54 /**
55 * @return PPFrame_Hash
56 */
57 public function newFrame() {
58 return new PPFrame_Hash( $this );
59 }
60
61 /**
62 * @param array $args
63 * @return PPCustomFrame_Hash
64 */
65 public function newCustomFrame( $args ) {
66 return new PPCustomFrame_Hash( $this, $args );
67 }
68
69 /**
70 * @param array $values
71 * @return PPNode_Hash_Array
72 */
73 public function newPartNodeArray( $values ) {
74 $list = [];
75
76 foreach ( $values as $k => $val ) {
77 if ( is_int( $k ) ) {
78 $store = [ [ 'part', [
79 [ 'name', [ [ '@index', [ $k ] ] ] ],
80 [ 'value', [ strval( $val ) ] ],
81 ] ] ];
82 } else {
83 $store = [ [ 'part', [
84 [ 'name', [ strval( $k ) ] ],
85 '=',
86 [ 'value', [ strval( $val ) ] ],
87 ] ] ];
88 }
89
90 $list[] = new PPNode_Hash_Tree( $store, 0 );
91 }
92
93 $node = new PPNode_Hash_Array( $list );
94 return $node;
95 }
96
97 /**
98 * Preprocess some wikitext and return the document tree.
99 *
100 * @param string $text The text to parse
101 * @param int $flags Bitwise combination of:
102 * Parser::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>" as if the text is being
103 * included. Default is to assume a direct page view.
104 *
105 * The generated DOM tree must depend only on the input text and the flags.
106 * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
107 *
108 * Any flag added to the $flags parameter here, or any other parameter liable to cause a
109 * change in the DOM tree for a given text, must be passed through the section identifier
110 * in the section edit link and thus back to extractSections().
111 *
112 * @throws MWException
113 * @return PPNode_Hash_Tree
114 */
115 public function preprocessToObj( $text, $flags = 0 ) {
116 global $wgDisableLangConversion;
117
118 $tree = $this->cacheGetTree( $text, $flags );
119 if ( $tree !== false ) {
120 $store = json_decode( $tree );
121 if ( is_array( $store ) ) {
122 return new PPNode_Hash_Tree( $store, 0 );
123 }
124 }
125
126 $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
127
128 $xmlishElements = $this->parser->getStripList();
129 $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
130 $enableOnlyinclude = false;
131 if ( $forInclusion ) {
132 $ignoredTags = [ 'includeonly', '/includeonly' ];
133 $ignoredElements = [ 'noinclude' ];
134 $xmlishElements[] = 'noinclude';
135 if ( strpos( $text, '<onlyinclude>' ) !== false
136 && strpos( $text, '</onlyinclude>' ) !== false
137 ) {
138 $enableOnlyinclude = true;
139 }
140 } else {
141 $ignoredTags = [ 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ];
142 $ignoredElements = [ 'includeonly' ];
143 $xmlishElements[] = 'includeonly';
144 }
145 $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
146
147 // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
148 $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
149
150 $stack = new PPDStack_Hash;
151
152 $searchBase = "[{<\n";
153 if ( !$wgDisableLangConversion ) {
154 $searchBase .= '-';
155 }
156
157 // For fast reverse searches
158 $revText = strrev( $text );
159 $lengthText = strlen( $text );
160
161 // Input pointer, starts out pointing to a pseudo-newline before the start
162 $i = 0;
163 // Current accumulator. See the doc comment for Preprocessor_Hash for the format.
164 $accum =& $stack->getAccum();
165 // True to find equals signs in arguments
166 $findEquals = false;
167 // True to take notice of pipe characters
168 $findPipe = false;
169 $headingIndex = 1;
170 // True if $i is inside a possible heading
171 $inHeading = false;
172 // True if there are no more greater-than (>) signs right of $i
173 $noMoreGT = false;
174 // Map of tag name => true if there are no more closing tags of given type right of $i
175 $noMoreClosingTag = [];
176 // True to ignore all input up to the next <onlyinclude>
177 $findOnlyinclude = $enableOnlyinclude;
178 // Do a line-start run without outputting an LF character
179 $fakeLineStart = true;
180
181 while ( true ) {
182 // $this->memCheck();
183
184 if ( $findOnlyinclude ) {
185 // Ignore all input up to the next <onlyinclude>
186 $startPos = strpos( $text, '<onlyinclude>', $i );
187 if ( $startPos === false ) {
188 // Ignored section runs to the end
189 $accum[] = [ 'ignore', [ substr( $text, $i ) ] ];
190 break;
191 }
192 $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
193 $accum[] = [ 'ignore', [ substr( $text, $i, $tagEndPos - $i ) ] ];
194 $i = $tagEndPos;
195 $findOnlyinclude = false;
196 }
197
198 if ( $fakeLineStart ) {
199 $found = 'line-start';
200 $curChar = '';
201 } else {
202 # Find next opening brace, closing brace or pipe
203 $search = $searchBase;
204 if ( $stack->top === false ) {
205 $currentClosing = '';
206 } else {
207 $currentClosing = $stack->top->close;
208 $search .= $currentClosing;
209 }
210 if ( $findPipe ) {
211 $search .= '|';
212 }
213 if ( $findEquals ) {
214 // First equals will be for the template
215 $search .= '=';
216 }
217 $rule = null;
218 # Output literal section, advance input counter
219 $literalLength = strcspn( $text, $search, $i );
220 if ( $literalLength > 0 ) {
221 self::addLiteral( $accum, substr( $text, $i, $literalLength ) );
222 $i += $literalLength;
223 }
224 if ( $i >= $lengthText ) {
225 if ( $currentClosing == "\n" ) {
226 // Do a past-the-end run to finish off the heading
227 $curChar = '';
228 $found = 'line-end';
229 } else {
230 # All done
231 break;
232 }
233 } else {
234 $curChar = $curTwoChar = $text[$i];
235 if ( ( $i + 1 ) < $lengthText ) {
236 $curTwoChar .= $text[$i + 1];
237 }
238 if ( $curChar == '|' ) {
239 $found = 'pipe';
240 } elseif ( $curChar == '=' ) {
241 $found = 'equals';
242 } elseif ( $curChar == '<' ) {
243 $found = 'angle';
244 } elseif ( $curChar == "\n" ) {
245 if ( $inHeading ) {
246 $found = 'line-end';
247 } else {
248 $found = 'line-start';
249 }
250 } elseif ( $curTwoChar == $currentClosing ) {
251 $found = 'close';
252 $curChar = $curTwoChar;
253 } elseif ( $curChar == $currentClosing ) {
254 $found = 'close';
255 } elseif ( isset( $this->rules[$curTwoChar] ) ) {
256 $curChar = $curTwoChar;
257 $found = 'open';
258 $rule = $this->rules[$curChar];
259 } elseif ( isset( $this->rules[$curChar] ) ) {
260 $found = 'open';
261 $rule = $this->rules[$curChar];
262 } else {
263 # Some versions of PHP have a strcspn which stops on
264 # null characters; ignore these and continue.
265 # We also may get '-' and '}' characters here which
266 # don't match -{ or $currentClosing. Add these to
267 # output and continue.
268 if ( $curChar == '-' || $curChar == '}' ) {
269 self::addLiteral( $accum, $curChar );
270 }
271 ++$i;
272 continue;
273 }
274 }
275 }
276
277 if ( $found == 'angle' ) {
278 $matches = false;
279 // Handle </onlyinclude>
280 if ( $enableOnlyinclude
281 && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>'
282 ) {
283 $findOnlyinclude = true;
284 continue;
285 }
286
287 // Determine element name
288 if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
289 // Element name missing or not listed
290 self::addLiteral( $accum, '<' );
291 ++$i;
292 continue;
293 }
294 // Handle comments
295 if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
296 // To avoid leaving blank lines, when a sequence of
297 // space-separated comments is both preceded and followed by
298 // a newline (ignoring spaces), then
299 // trim leading and trailing spaces and the trailing newline.
300
301 // Find the end
302 $endPos = strpos( $text, '-->', $i + 4 );
303 if ( $endPos === false ) {
304 // Unclosed comment in input, runs to end
305 $inner = substr( $text, $i );
306 $accum[] = [ 'comment', [ $inner ] ];
307 $i = $lengthText;
308 } else {
309 // Search backwards for leading whitespace
310 $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0;
311
312 // Search forwards for trailing whitespace
313 // $wsEnd will be the position of the last space (or the '>' if there's none)
314 $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
315
316 // Keep looking forward as long as we're finding more
317 // comments.
318 $comments = [ [ $wsStart, $wsEnd ] ];
319 while ( substr( $text, $wsEnd + 1, 4 ) == '<!--' ) {
320 $c = strpos( $text, '-->', $wsEnd + 4 );
321 if ( $c === false ) {
322 break;
323 }
324 $c = $c + 2 + strspn( $text, " \t", $c + 3 );
325 $comments[] = [ $wsEnd + 1, $c ];
326 $wsEnd = $c;
327 }
328
329 // Eat the line if possible
330 // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
331 // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
332 // it's a possible beneficial b/c break.
333 if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
334 && substr( $text, $wsEnd + 1, 1 ) == "\n"
335 ) {
336 // Remove leading whitespace from the end of the accumulator
337 $wsLength = $i - $wsStart;
338 $endIndex = count( $accum ) - 1;
339
340 // Sanity check
341 if ( $wsLength > 0
342 && $endIndex >= 0
343 && is_string( $accum[$endIndex] )
344 && strspn( $accum[$endIndex], " \t", -$wsLength ) === $wsLength
345 ) {
346 $accum[$endIndex] = substr( $accum[$endIndex], 0, -$wsLength );
347 }
348
349 // Dump all but the last comment to the accumulator
350 foreach ( $comments as $j => $com ) {
351 $startPos = $com[0];
352 $endPos = $com[1] + 1;
353 if ( $j == ( count( $comments ) - 1 ) ) {
354 break;
355 }
356 $inner = substr( $text, $startPos, $endPos - $startPos );
357 $accum[] = [ 'comment', [ $inner ] ];
358 }
359
360 // Do a line-start run next time to look for headings after the comment
361 $fakeLineStart = true;
362 } else {
363 // No line to eat, just take the comment itself
364 $startPos = $i;
365 $endPos += 2;
366 }
367
368 if ( $stack->top ) {
369 $part = $stack->top->getCurrentPart();
370 if ( !( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) ) {
371 $part->visualEnd = $wsStart;
372 }
373 // Else comments abutting, no change in visual end
374 $part->commentEnd = $endPos;
375 }
376 $i = $endPos + 1;
377 $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
378 $accum[] = [ 'comment', [ $inner ] ];
379 }
380 continue;
381 }
382 $name = $matches[1];
383 $lowerName = strtolower( $name );
384 $attrStart = $i + strlen( $name ) + 1;
385
386 // Find end of tag
387 $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
388 if ( $tagEndPos === false ) {
389 // Infinite backtrack
390 // Disable tag search to prevent worst-case O(N^2) performance
391 $noMoreGT = true;
392 self::addLiteral( $accum, '<' );
393 ++$i;
394 continue;
395 }
396
397 // Handle ignored tags
398 if ( in_array( $lowerName, $ignoredTags ) ) {
399 $accum[] = [ 'ignore', [ substr( $text, $i, $tagEndPos - $i + 1 ) ] ];
400 $i = $tagEndPos + 1;
401 continue;
402 }
403
404 $tagStartPos = $i;
405 if ( $text[$tagEndPos - 1] == '/' ) {
406 // Short end tag
407 $attrEnd = $tagEndPos - 1;
408 $inner = null;
409 $i = $tagEndPos + 1;
410 $close = null;
411 } else {
412 $attrEnd = $tagEndPos;
413 // Find closing tag
414 if (
415 !isset( $noMoreClosingTag[$name] ) &&
416 preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
417 $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 )
418 ) {
419 $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
420 $i = $matches[0][1] + strlen( $matches[0][0] );
421 $close = $matches[0][0];
422 } else {
423 // No end tag
424 if ( in_array( $name, $xmlishAllowMissingEndTag ) ) {
425 // Let it run out to the end of the text.
426 $inner = substr( $text, $tagEndPos + 1 );
427 $i = $lengthText;
428 $close = null;
429 } else {
430 // Don't match the tag, treat opening tag as literal and resume parsing.
431 $i = $tagEndPos + 1;
432 self::addLiteral( $accum,
433 substr( $text, $tagStartPos, $tagEndPos + 1 - $tagStartPos ) );
434 // Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
435 $noMoreClosingTag[$name] = true;
436 continue;
437 }
438 }
439 }
440 // <includeonly> and <noinclude> just become <ignore> tags
441 if ( in_array( $lowerName, $ignoredElements ) ) {
442 $accum[] = [ 'ignore', [ substr( $text, $tagStartPos, $i - $tagStartPos ) ] ];
443 continue;
444 }
445
446 if ( $attrEnd <= $attrStart ) {
447 $attr = '';
448 } else {
449 // Note that the attr element contains the whitespace between name and attribute,
450 // this is necessary for precise reconstruction during pre-save transform.
451 $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
452 }
453
454 $children = [
455 [ 'name', [ $name ] ],
456 [ 'attr', [ $attr ] ] ];
457 if ( $inner !== null ) {
458 $children[] = [ 'inner', [ $inner ] ];
459 }
460 if ( $close !== null ) {
461 $children[] = [ 'close', [ $close ] ];
462 }
463 $accum[] = [ 'ext', $children ];
464 } elseif ( $found == 'line-start' ) {
465 // Is this the start of a heading?
466 // Line break belongs before the heading element in any case
467 if ( $fakeLineStart ) {
468 $fakeLineStart = false;
469 } else {
470 self::addLiteral( $accum, $curChar );
471 $i++;
472 }
473
474 $count = strspn( $text, '=', $i, 6 );
475 if ( $count == 1 && $findEquals ) {
476 // DWIM: This looks kind of like a name/value separator.
477 // Let's let the equals handler have it and break the potential
478 // heading. This is heuristic, but AFAICT the methods for
479 // completely correct disambiguation are very complex.
480 } elseif ( $count > 0 ) {
481 $piece = [
482 'open' => "\n",
483 'close' => "\n",
484 'parts' => [ new PPDPart_Hash( str_repeat( '=', $count ) ) ],
485 'startPos' => $i,
486 'count' => $count ];
487 $stack->push( $piece );
488 $accum =& $stack->getAccum();
489 $stackFlags = $stack->getFlags();
490 if ( isset( $stackFlags['findEquals'] ) ) {
491 $findEquals = $stackFlags['findEquals'];
492 }
493 if ( isset( $stackFlags['findPipe'] ) ) {
494 $findPipe = $stackFlags['findPipe'];
495 }
496 if ( isset( $stackFlags['inHeading'] ) ) {
497 $inHeading = $stackFlags['inHeading'];
498 }
499 $i += $count;
500 }
501 } elseif ( $found == 'line-end' ) {
502 $piece = $stack->top;
503 // A heading must be open, otherwise \n wouldn't have been in the search list
504 // FIXME: Don't use assert()
505 // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.assert
506 assert( $piece->open === "\n" );
507 $part = $piece->getCurrentPart();
508 // Search back through the input to see if it has a proper close.
509 // Do this using the reversed string since the other solutions
510 // (end anchor, etc.) are inefficient.
511 $wsLength = strspn( $revText, " \t", $lengthText - $i );
512 $searchStart = $i - $wsLength;
513 if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
514 // Comment found at line end
515 // Search for equals signs before the comment
516 $searchStart = $part->visualEnd;
517 $searchStart -= strspn( $revText, " \t", $lengthText - $searchStart );
518 }
519 $count = $piece->count;
520 $equalsLength = strspn( $revText, '=', $lengthText - $searchStart );
521 if ( $equalsLength > 0 ) {
522 if ( $searchStart - $equalsLength == $piece->startPos ) {
523 // This is just a single string of equals signs on its own line
524 // Replicate the doHeadings behavior /={count}(.+)={count}/
525 // First find out how many equals signs there really are (don't stop at 6)
526 $count = $equalsLength;
527 if ( $count < 3 ) {
528 $count = 0;
529 } else {
530 $count = min( 6, intval( ( $count - 1 ) / 2 ) );
531 }
532 } else {
533 $count = min( $equalsLength, $count );
534 }
535 if ( $count > 0 ) {
536 // Normal match, output <h>
537 $element = [ [ 'possible-h',
538 array_merge(
539 [
540 [ '@level', [ $count ] ],
541 [ '@i', [ $headingIndex++ ] ]
542 ],
543 $accum
544 )
545 ] ];
546 } else {
547 // Single equals sign on its own line, count=0
548 $element = $accum;
549 }
550 } else {
551 // No match, no <h>, just pass down the inner text
552 $element = $accum;
553 }
554 // Unwind the stack
555 $stack->pop();
556 $accum =& $stack->getAccum();
557 $stackFlags = $stack->getFlags();
558 if ( isset( $stackFlags['findEquals'] ) ) {
559 $findEquals = $stackFlags['findEquals'];
560 }
561 if ( isset( $stackFlags['findPipe'] ) ) {
562 $findPipe = $stackFlags['findPipe'];
563 }
564 if ( isset( $stackFlags['inHeading'] ) ) {
565 $inHeading = $stackFlags['inHeading'];
566 }
567
568 // Append the result to the enclosing accumulator
569 array_splice( $accum, count( $accum ), 0, $element );
570
571 // Note that we do NOT increment the input pointer.
572 // This is because the closing linebreak could be the opening linebreak of
573 // another heading. Infinite loops are avoided because the next iteration MUST
574 // hit the heading open case above, which unconditionally increments the
575 // input pointer.
576 } elseif ( $found == 'open' ) {
577 # count opening brace characters
578 $curLen = strlen( $curChar );
579 $count = ( $curLen > 1 ) ?
580 # allow the final character to repeat
581 strspn( $text, $curChar[$curLen - 1], $i + 1 ) + 1 :
582 strspn( $text, $curChar, $i );
583
584 $savedPrefix = '';
585 $lineStart = ( $i > 0 && $text[$i - 1] == "\n" );
586
587 if ( $curChar === "-{" && $count > $curLen ) {
588 // -{ => {{ transition because rightmost wins
589 $savedPrefix = '-';
590 $i++;
591 $curChar = '{';
592 $count--;
593 $rule = $this->rules[$curChar];
594 }
595
596 # we need to add to stack only if opening brace count is enough for one of the rules
597 if ( $count >= $rule['min'] ) {
598 # Add it to the stack
599 $piece = [
600 'open' => $curChar,
601 'close' => $rule['end'],
602 'savedPrefix' => $savedPrefix,
603 'count' => $count,
604 'lineStart' => $lineStart,
605 ];
606
607 $stack->push( $piece );
608 $accum =& $stack->getAccum();
609 $stackFlags = $stack->getFlags();
610 if ( isset( $stackFlags['findEquals'] ) ) {
611 $findEquals = $stackFlags['findEquals'];
612 }
613 if ( isset( $stackFlags['findPipe'] ) ) {
614 $findPipe = $stackFlags['findPipe'];
615 }
616 if ( isset( $stackFlags['inHeading'] ) ) {
617 $inHeading = $stackFlags['inHeading'];
618 }
619 } else {
620 # Add literal brace(s)
621 self::addLiteral( $accum, $savedPrefix . str_repeat( $curChar, $count ) );
622 }
623 $i += $count;
624 } elseif ( $found == 'close' ) {
625 /** @var PPDStackElement_Hash $piece */
626 $piece = $stack->top;
627 '@phan-var PPDStackElement_Hash $piece';
628 # lets check if there are enough characters for closing brace
629 $maxCount = $piece->count;
630 if ( $piece->close === '}-' && $curChar === '}' ) {
631 $maxCount--; # don't try to match closing '-' as a '}'
632 }
633 $curLen = strlen( $curChar );
634 $count = ( $curLen > 1 ) ? $curLen :
635 strspn( $text, $curChar, $i, $maxCount );
636
637 # check for maximum matching characters (if there are 5 closing
638 # characters, we will probably need only 3 - depending on the rules)
639 $rule = $this->rules[$piece->open];
640 if ( $count > $rule['max'] ) {
641 # The specified maximum exists in the callback array, unless the caller
642 # has made an error
643 $matchingCount = $rule['max'];
644 } else {
645 # Count is less than the maximum
646 # Skip any gaps in the callback array to find the true largest match
647 # Need to use array_key_exists not isset because the callback can be null
648 $matchingCount = $count;
649 while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
650 --$matchingCount;
651 }
652 }
653
654 if ( $matchingCount <= 0 ) {
655 # No matching element found in callback array
656 # Output a literal closing brace and continue
657 $endText = substr( $text, $i, $count );
658 self::addLiteral( $accum, $endText );
659 $i += $count;
660 continue;
661 }
662 $name = $rule['names'][$matchingCount];
663 if ( $name === null ) {
664 // No element, just literal text
665 $endText = substr( $text, $i, $matchingCount );
666 $element = $piece->breakSyntax( $matchingCount );
667 self::addLiteral( $element, $endText );
668 } else {
669 # Create XML element
670 $parts = $piece->parts;
671 $titleAccum = $parts[0]->out;
672 unset( $parts[0] );
673
674 $children = [];
675
676 # The invocation is at the start of the line if lineStart is set in
677 # the stack, and all opening brackets are used up.
678 if ( $maxCount == $matchingCount &&
679 !empty( $piece->lineStart ) &&
680 strlen( $piece->savedPrefix ) == 0 ) {
681 $children[] = [ '@lineStart', [ 1 ] ];
682 }
683 $titleNode = [ 'title', $titleAccum ];
684 $children[] = $titleNode;
685 $argIndex = 1;
686 foreach ( $parts as $part ) {
687 if ( isset( $part->eqpos ) ) {
688 $equalsNode = $part->out[$part->eqpos];
689 $nameNode = [ 'name', array_slice( $part->out, 0, $part->eqpos ) ];
690 $valueNode = [ 'value', array_slice( $part->out, $part->eqpos + 1 ) ];
691 $partNode = [ 'part', [ $nameNode, $equalsNode, $valueNode ] ];
692 $children[] = $partNode;
693 } else {
694 $nameNode = [ 'name', [ [ '@index', [ $argIndex++ ] ] ] ];
695 $valueNode = [ 'value', $part->out ];
696 $partNode = [ 'part', [ $nameNode, $valueNode ] ];
697 $children[] = $partNode;
698 }
699 }
700 $element = [ [ $name, $children ] ];
701 }
702
703 # Advance input pointer
704 $i += $matchingCount;
705
706 # Unwind the stack
707 $stack->pop();
708 $accum =& $stack->getAccum();
709
710 # Re-add the old stack element if it still has unmatched opening characters remaining
711 if ( $matchingCount < $piece->count ) {
712 $piece->parts = [ new PPDPart_Hash ];
713 $piece->count -= $matchingCount;
714 # do we still qualify for any callback with remaining count?
715 $min = $this->rules[$piece->open]['min'];
716 if ( $piece->count >= $min ) {
717 $stack->push( $piece );
718 $accum =& $stack->getAccum();
719 } elseif ( $piece->count == 1 && $piece->open === '{' && $piece->savedPrefix === '-' ) {
720 $piece->savedPrefix = '';
721 $piece->open = '-{';
722 $piece->count = 2;
723 $piece->close = $this->rules[$piece->open]['end'];
724 $stack->push( $piece );
725 $accum =& $stack->getAccum();
726 } else {
727 $s = substr( $piece->open, 0, -1 );
728 $s .= str_repeat(
729 substr( $piece->open, -1 ),
730 $piece->count - strlen( $s )
731 );
732 self::addLiteral( $accum, $piece->savedPrefix . $s );
733 }
734 } elseif ( $piece->savedPrefix !== '' ) {
735 self::addLiteral( $accum, $piece->savedPrefix );
736 }
737
738 $stackFlags = $stack->getFlags();
739 if ( isset( $stackFlags['findEquals'] ) ) {
740 $findEquals = $stackFlags['findEquals'];
741 }
742 if ( isset( $stackFlags['findPipe'] ) ) {
743 $findPipe = $stackFlags['findPipe'];
744 }
745 if ( isset( $stackFlags['inHeading'] ) ) {
746 $inHeading = $stackFlags['inHeading'];
747 }
748
749 # Add XML element to the enclosing accumulator
750 array_splice( $accum, count( $accum ), 0, $element );
751 } elseif ( $found == 'pipe' ) {
752 $findEquals = true; // shortcut for getFlags()
753 $stack->addPart();
754 $accum =& $stack->getAccum();
755 ++$i;
756 } elseif ( $found == 'equals' ) {
757 $findEquals = false; // shortcut for getFlags()
758 $accum[] = [ 'equals', [ '=' ] ];
759 $stack->getCurrentPart()->eqpos = count( $accum ) - 1;
760 ++$i;
761 }
762 }
763
764 # Output any remaining unclosed brackets
765 foreach ( $stack->stack as $piece ) {
766 array_splice( $stack->rootAccum, count( $stack->rootAccum ), 0, $piece->breakSyntax() );
767 }
768
769 # Enable top-level headings
770 foreach ( $stack->rootAccum as &$node ) {
771 if ( is_array( $node ) && $node[PPNode_Hash_Tree::NAME] === 'possible-h' ) {
772 $node[PPNode_Hash_Tree::NAME] = 'h';
773 }
774 }
775
776 $rootStore = [ [ 'root', $stack->rootAccum ] ];
777 $rootNode = new PPNode_Hash_Tree( $rootStore, 0 );
778
779 // Cache
780 $tree = json_encode( $rootStore, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE );
781 if ( $tree !== false ) {
782 $this->cacheSetTree( $text, $flags, $tree );
783 }
784
785 return $rootNode;
786 }
787
788 private static function addLiteral( array &$accum, $text ) {
789 $n = count( $accum );
790 if ( $n && is_string( $accum[$n - 1] ) ) {
791 $accum[$n - 1] .= $text;
792 } else {
793 $accum[] = $text;
794 }
795 }
796 }