Merge "Disable warning about direct text table access for now"
[lhc/web/wiklou.git] / includes / tidy / RemexCompatMunger.php
1 <?php
2
3 namespace MediaWiki\Tidy;
4
5 use RemexHtml\HTMLData;
6 use RemexHtml\Serializer\Serializer;
7 use RemexHtml\Serializer\SerializerNode;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
10 use RemexHtml\TreeBuilder\TreeBuilder;
11 use RemexHtml\TreeBuilder\TreeHandler;
12 use RemexHtml\TreeBuilder\Element;
13
14 /**
15 * @internal
16 */
17 class RemexCompatMunger implements TreeHandler {
18 private static $onlyInlineElements = [
19 "a" => true,
20 "abbr" => true,
21 "acronym" => true,
22 "applet" => true,
23 "b" => true,
24 "basefont" => true,
25 "bdo" => true,
26 "big" => true,
27 "br" => true,
28 "button" => true,
29 "cite" => true,
30 "code" => true,
31 "del" => true,
32 "dfn" => true,
33 "em" => true,
34 "font" => true,
35 "i" => true,
36 "iframe" => true,
37 "img" => true,
38 "input" => true,
39 "ins" => true,
40 "kbd" => true,
41 "label" => true,
42 "legend" => true,
43 "map" => true,
44 "object" => true,
45 "param" => true,
46 "q" => true,
47 "rb" => true,
48 "rbc" => true,
49 "rp" => true,
50 "rt" => true,
51 "rtc" => true,
52 "ruby" => true,
53 "s" => true,
54 "samp" => true,
55 "select" => true,
56 "small" => true,
57 "span" => true,
58 "strike" => true,
59 "strong" => true,
60 "sub" => true,
61 "sup" => true,
62 "textarea" => true,
63 "tt" => true,
64 "u" => true,
65 "var" => true,
66 // Those defined in tidy.conf
67 "video" => true,
68 "audio" => true,
69 "bdi" => true,
70 "data" => true,
71 "time" => true,
72 "mark" => true,
73 ];
74
75 private static $formattingElements = [
76 'a' => true,
77 'b' => true,
78 'big' => true,
79 'code' => true,
80 'em' => true,
81 'font' => true,
82 'i' => true,
83 'nobr' => true,
84 's' => true,
85 'small' => true,
86 'strike' => true,
87 'strong' => true,
88 'tt' => true,
89 'u' => true,
90 ];
91
92 /** @var Serializer */
93 private $serializer;
94
95 /** @var bool */
96 private $trace;
97
98 /**
99 * @param Serializer $serializer
100 * @param bool $trace
101 */
102 public function __construct( Serializer $serializer, $trace = false ) {
103 $this->serializer = $serializer;
104 $this->trace = $trace;
105 }
106
107 public function startDocument( $fragmentNamespace, $fragmentName ) {
108 $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
109 $root = $this->serializer->getRootNode();
110 $root->snData = new RemexMungerData;
111 $root->snData->needsPWrapping = true;
112 }
113
114 public function endDocument( $pos ) {
115 $this->serializer->endDocument( $pos );
116 }
117
118 private function getParentForInsert( $preposition, $refElement ) {
119 if ( $preposition === TreeBuilder::ROOT ) {
120 return [ $this->serializer->getRootNode(), null ];
121 } elseif ( $preposition === TreeBuilder::BEFORE ) {
122 $refNode = $refElement->userData;
123 return [ $this->serializer->getParentNode( $refNode ), $refNode ];
124 } else {
125 $refNode = $refElement->userData;
126 $refData = $refNode->snData;
127 if ( $refData->currentCloneElement ) {
128 // Follow a chain of clone links if necessary
129 $origRefData = $refData;
130 while ( $refData->currentCloneElement ) {
131 $refElement = $refData->currentCloneElement;
132 $refNode = $refElement->userData;
133 $refData = $refNode->snData;
134 }
135 // Cache the end of the chain in the requested element
136 $origRefData->currentCloneElement = $refElement;
137 } elseif ( $refData->childPElement ) {
138 $refElement = $refData->childPElement;
139 $refNode = $refElement->userData;
140 }
141 return [ $refNode, $refNode ];
142 }
143 }
144
145 /**
146 * Insert a p-wrapper
147 *
148 * @param SerializerNode $parent
149 * @param int $sourceStart
150 * @return SerializerNode
151 */
152 private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
153 $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
154 $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
155 $sourceStart, 0 );
156 $data = new RemexMungerData;
157 $data->isPWrapper = true;
158 $data->wrapBaseNode = $parent;
159 $pWrap->userData->snData = $data;
160 $parent->snData->childPElement = $pWrap;
161 return $pWrap->userData;
162 }
163
164 public function characters( $preposition, $refElement, $text, $start, $length,
165 $sourceStart, $sourceLength
166 ) {
167 $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
168
169 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
170 $parentData = $parent->snData;
171
172 if ( $preposition === TreeBuilder::UNDER ) {
173 if ( $parentData->needsPWrapping && !$isBlank ) {
174 // Add a p-wrapper for bare text under body/blockquote
175 $refNode = $this->insertPWrapper( $refNode, $sourceStart );
176 $parent = $refNode;
177 $parentData = $parent->snData;
178 } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
179 // The parent is splittable and in block mode, so split the tag stack
180 $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
181 $parent = $refNode;
182 $parentData = $parent->snData;
183 }
184 }
185
186 if ( !$isBlank ) {
187 // Non-whitespace characters detected
188 $parentData->nonblankNodeCount++;
189 }
190 $this->serializer->characters( $preposition, $refNode, $text, $start,
191 $length, $sourceStart, $sourceLength );
192 }
193
194 private function trace( $msg ) {
195 if ( $this->trace ) {
196 wfDebug( "[RCM] $msg" );
197 }
198 }
199
200 /**
201 * Insert or reparent an element. Create p-wrappers or split the tag stack
202 * as necessary.
203 *
204 * Consider the following insertion locations. The parent may be:
205 *
206 * - A: A body or blockquote (!!needsPWrapping)
207 * - B: A p-wrapper (!!isPWrapper)
208 * - C: A descendant of a p-wrapper (!!ancestorPNode)
209 * - CS: With splittable formatting elements in the stack region up to
210 * the p-wrapper
211 * - CU: With one or more unsplittable elements in the stack region up
212 * to the p-wrapper
213 * - D: Not a descendant of a p-wrapper (!ancestorNode)
214 * - DS: With splittable formatting elements in the stack region up to
215 * the body or blockquote
216 * - DU: With one or more unsplittable elements in the stack region up
217 * to the body or blockquote
218 *
219 * And consider that we may insert two types of element:
220 * - b: block
221 * - i: inline
222 *
223 * We handle the insertion as follows:
224 *
225 * - A/i: Create a p-wrapper, insert under it
226 * - A/b: Insert as normal
227 * - B/i: Insert as normal
228 * - B/b: Close the p-wrapper, insert under the body/blockquote (wrap
229 * base) instead)
230 * - C/i: Insert as normal
231 * - CS/b: Split the tag stack, insert the block under cloned formatting
232 * elements which have the wrap base (the parent of the p-wrap) as
233 * their ultimate parent.
234 * - CU/b: Disable the p-wrap, by reparenting the currently open child
235 * of the p-wrap under the p-wrap's parent. Then insert the block as
236 * normal.
237 * - D/b: Insert as normal
238 * - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate
239 * parent of the formatting elements thus cloned. The parent of the
240 * p-wrapper is the body or blockquote.
241 * - DU/i: Insert as normal
242 *
243 * FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as
244 * normal, the full algorithm is not followed.
245 *
246 * @param int $preposition
247 * @param Element|SerializerNode|null $refElement
248 * @param Element $element
249 * @param bool $void
250 * @param int $sourceStart
251 * @param int $sourceLength
252 */
253 public function insertElement( $preposition, $refElement, Element $element, $void,
254 $sourceStart, $sourceLength
255 ) {
256 list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
257 $parentData = $parent->snData;
258 $elementName = $element->htmlName;
259
260 $inline = isset( self::$onlyInlineElements[$elementName] );
261 $under = $preposition === TreeBuilder::UNDER;
262 $elementToEnd = null;
263
264 if ( $under && $parentData->isPWrapper && !$inline ) {
265 // [B/b] The element is non-inline and the parent is a p-wrapper,
266 // close the parent and insert into its parent instead
267 $this->trace( 'insert B/b' );
268 $newParent = $this->serializer->getParentNode( $parent );
269 $parent = $newParent;
270 $parentData = $parent->snData;
271 $pElement = $parentData->childPElement;
272 $parentData->childPElement = null;
273 $newRef = $refElement->userData;
274 } elseif ( $under && $parentData->isSplittable
275 && (bool)$parentData->ancestorPNode !== $inline
276 ) {
277 // [CS/b, DS/i] The parent is splittable and the current element is
278 // inline in block context, or if the current element is a block
279 // under a p-wrapper, split the tag stack.
280 $this->trace( $inline ? 'insert DS/i' : 'insert CS/b' );
281 $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
282 $parent = $newRef;
283 $parentData = $parent->snData;
284 } elseif ( $under && $parentData->needsPWrapping && $inline ) {
285 // [A/i] If the element is inline and we are in body/blockquote,
286 // we need to create a p-wrapper
287 $this->trace( 'insert A/i' );
288 $newRef = $this->insertPWrapper( $newRef, $sourceStart );
289 $parent = $newRef;
290 $parentData = $parent->snData;
291 } elseif ( $parentData->ancestorPNode && !$inline ) {
292 // [CU/b] If the element is non-inline and (despite attempting to
293 // split above) there is still an ancestor p-wrap, disable that
294 // p-wrap
295 $this->trace( 'insert CU/b' );
296 $this->disablePWrapper( $parent, $sourceStart );
297 } else {
298 // [A/b, B/i, C/i, D/b, DU/i] insert as normal
299 $this->trace( 'insert normal' );
300 }
301
302 // An element with element children is a non-blank element
303 $parentData->nonblankNodeCount++;
304
305 // Insert the element downstream and so initialise its userData
306 $this->serializer->insertElement( $preposition, $newRef,
307 $element, $void, $sourceStart, $sourceLength );
308
309 // Initialise snData
310 if ( !$element->userData->snData ) {
311 $elementData = $element->userData->snData = new RemexMungerData;
312 } else {
313 $elementData = $element->userData->snData;
314 }
315 if ( ( $parentData->isPWrapper || $parentData->isSplittable )
316 && isset( self::$formattingElements[$elementName] )
317 ) {
318 $elementData->isSplittable = true;
319 }
320 if ( $parentData->isPWrapper ) {
321 $elementData->ancestorPNode = $parent;
322 } elseif ( $parentData->ancestorPNode ) {
323 $elementData->ancestorPNode = $parentData->ancestorPNode;
324 }
325 if ( $parentData->wrapBaseNode ) {
326 $elementData->wrapBaseNode = $parentData->wrapBaseNode;
327 } elseif ( $parentData->needsPWrapping ) {
328 $elementData->wrapBaseNode = $parent;
329 }
330 if ( $elementName === 'body'
331 || $elementName === 'blockquote'
332 || $elementName === 'html'
333 ) {
334 $elementData->needsPWrapping = true;
335 }
336 }
337
338 /**
339 * Clone nodes in a stack range and return the new parent
340 *
341 * @param SerializerNode $parentNode
342 * @param bool $inline
343 * @param int $pos The source position
344 * @return SerializerNode
345 */
346 private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
347 $parentData = $parentNode->snData;
348 $wrapBase = $parentData->wrapBaseNode;
349 $pWrap = $parentData->ancestorPNode;
350 if ( !$pWrap ) {
351 $cloneEnd = $wrapBase;
352 } else {
353 $cloneEnd = $parentData->ancestorPNode;
354 }
355
356 $serializer = $this->serializer;
357 $node = $parentNode;
358 $root = $serializer->getRootNode();
359 $nodes = [];
360 $removableNodes = [];
361 while ( $node !== $cloneEnd ) {
362 $nextParent = $serializer->getParentNode( $node );
363 if ( $nextParent === $root ) {
364 throw new \Exception( 'Did not find end of clone range' );
365 }
366 $nodes[] = $node;
367 if ( $node->snData->nonblankNodeCount === 0 ) {
368 $removableNodes[] = $node;
369 $nextParent->snData->nonblankNodeCount--;
370 }
371 $node = $nextParent;
372 }
373
374 if ( $inline ) {
375 $pWrap = $this->insertPWrapper( $wrapBase, $pos );
376 $node = $pWrap;
377 } else {
378 if ( $pWrap ) {
379 // End the p-wrap which was open, cancel the diversion
380 $wrapBase->snData->childPElement = null;
381 }
382 $pWrap = null;
383 $node = $wrapBase;
384 }
385
386 for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
387 $oldNode = $nodes[$i];
388 $oldData = $oldNode->snData;
389 $nodeParent = $node;
390 $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
391 $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
392 $element, false, $pos, 0 );
393 $oldData->currentCloneElement = $element;
394
395 $newNode = $element->userData;
396 $newData = $newNode->snData = new RemexMungerData;
397 if ( $pWrap ) {
398 $newData->ancestorPNode = $pWrap;
399 }
400 $newData->isSplittable = true;
401 $newData->wrapBaseNode = $wrapBase;
402 $newData->isPWrapper = $oldData->isPWrapper;
403
404 $nodeParent->snData->nonblankNodeCount++;
405
406 $node = $newNode;
407 }
408 foreach ( $removableNodes as $rNode ) {
409 $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
410 $fakeElement->userData = $rNode;
411 $this->serializer->removeNode( $fakeElement, $pos );
412 }
413 return $node;
414 }
415
416 /**
417 * Find the ancestor of $node which is a child of a p-wrapper, and
418 * reparent that node so that it is placed after the end of the p-wrapper
419 */
420 private function disablePWrapper( SerializerNode $node, $sourceStart ) {
421 $nodeData = $node->snData;
422 $pWrapNode = $nodeData->ancestorPNode;
423 $newParent = $this->serializer->getParentNode( $pWrapNode );
424 if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
425 // Fostering or something? Abort!
426 return;
427 }
428
429 $nextParent = $node;
430 do {
431 $victim = $nextParent;
432 $victim->snData->ancestorPNode = null;
433 $nextParent = $this->serializer->getParentNode( $victim );
434 } while ( $nextParent !== $pWrapNode );
435
436 // Make a fake Element to use in a reparenting operation
437 $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
438 $victimElement->userData = $victim;
439
440 // Reparent
441 $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
442 false, $sourceStart, 0 );
443
444 // Decrement nonblank node count
445 $pWrapNode->snData->nonblankNodeCount--;
446
447 // Cancel the diversion so that no more elements are inserted under this p-wrap
448 $newParent->snData->childPElement = null;
449 }
450
451 public function endTag( Element $element, $sourceStart, $sourceLength ) {
452 $data = $element->userData->snData;
453 if ( $data->childPElement ) {
454 $this->endTag( $data->childPElement, $sourceStart, 0 );
455 }
456 $this->serializer->endTag( $element, $sourceStart, $sourceLength );
457 $element->userData->snData = null;
458 $element->userData = null;
459 }
460
461 public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
462 $this->serializer->doctype( $name, $public, $system, $quirks,
463 $sourceStart, $sourceLength );
464 }
465
466 public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
467 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
468 $this->serializer->comment( $preposition, $refNode, $text,
469 $sourceStart, $sourceLength );
470 }
471
472 public function error( $text, $pos ) {
473 $this->serializer->error( $text, $pos );
474 }
475
476 public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
477 $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
478 }
479
480 public function removeNode( Element $element, $sourceStart ) {
481 $this->serializer->removeNode( $element, $sourceStart );
482 }
483
484 public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
485 $self = $element->userData;
486 if ( $self->snData->childPElement ) {
487 // Reparent under the p-wrapper instead, so that e.g.
488 // <blockquote><mw:p-wrap>...</mw:p-wrap></blockquote>
489 // becomes
490 // <blockquote><mw:p-wrap><i>...</i></mw:p-wrap></blockquote>
491
492 // The formatting element should not be the parent of the p-wrap.
493 // Without this special case, the insertElement() of the <i> below
494 // would be diverted into the p-wrapper, causing infinite recursion
495 // (T178632)
496 $this->reparentChildren( $self->snData->childPElement, $newParent, $sourceStart );
497 return;
498 }
499
500 $children = $self->children;
501 $self->children = [];
502 $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
503 $newParentNode = $newParent->userData;
504 $newParentId = $newParentNode->id;
505 foreach ( $children as $child ) {
506 if ( is_object( $child ) ) {
507 $this->trace( "reparent <{$child->name}>" );
508 $child->parentId = $newParentId;
509 }
510 }
511 $newParentNode->children = $children;
512 }
513 }