Merge "Test ApiUserrights"
[lhc/web/wiklou.git] / includes / tidy / RemexCompatMunger.php
1 <?php
2
3 namespace MediaWiki\Tidy;
4
5 use RemexHtml\HTMLData;
6 use RemexHtml\Serializer\Serializer;
7 use RemexHtml\Serializer\SerializerNode;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
10 use RemexHtml\TreeBuilder\TreeBuilder;
11 use RemexHtml\TreeBuilder\TreeHandler;
12 use RemexHtml\TreeBuilder\Element;
13
14 /**
15 * @internal
16 */
17 class RemexCompatMunger implements TreeHandler {
18 private static $onlyInlineElements = [
19 "a" => true,
20 "abbr" => true,
21 "acronym" => true,
22 "applet" => true,
23 "b" => true,
24 "basefont" => true,
25 "bdo" => true,
26 "big" => true,
27 "br" => true,
28 "button" => true,
29 "cite" => true,
30 "code" => true,
31 "dfn" => true,
32 "em" => true,
33 "font" => true,
34 "i" => true,
35 "iframe" => true,
36 "img" => true,
37 "input" => true,
38 "kbd" => true,
39 "label" => true,
40 "legend" => true,
41 "map" => true,
42 "object" => true,
43 "param" => true,
44 "q" => true,
45 "rb" => true,
46 "rbc" => true,
47 "rp" => true,
48 "rt" => true,
49 "rtc" => true,
50 "ruby" => true,
51 "s" => true,
52 "samp" => true,
53 "select" => true,
54 "small" => true,
55 "span" => true,
56 "strike" => true,
57 "strong" => true,
58 "sub" => true,
59 "sup" => true,
60 "textarea" => true,
61 "tt" => true,
62 "u" => true,
63 "var" => true,
64 // Those defined in tidy.conf
65 "video" => true,
66 "audio" => true,
67 "bdi" => true,
68 "data" => true,
69 "time" => true,
70 "mark" => true,
71 ];
72
73 private static $formattingElements = [
74 'a' => true,
75 'b' => true,
76 'big' => true,
77 'code' => true,
78 'em' => true,
79 'font' => true,
80 'i' => true,
81 'nobr' => true,
82 's' => true,
83 'small' => true,
84 'strike' => true,
85 'strong' => true,
86 'tt' => true,
87 'u' => true,
88 ];
89
90 /**
91 * @param Serializer $serializer
92 */
93 public function __construct( Serializer $serializer ) {
94 $this->serializer = $serializer;
95 }
96
97 public function startDocument( $fragmentNamespace, $fragmentName ) {
98 $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
99 $root = $this->serializer->getRootNode();
100 $root->snData = new RemexMungerData;
101 $root->snData->needsPWrapping = true;
102 }
103
104 public function endDocument( $pos ) {
105 $this->serializer->endDocument( $pos );
106 }
107
108 private function getParentForInsert( $preposition, $refElement ) {
109 if ( $preposition === TreeBuilder::ROOT ) {
110 return [ $this->serializer->getRootNode(), null ];
111 } elseif ( $preposition === TreeBuilder::BEFORE ) {
112 $refNode = $refElement->userData;
113 return [ $this->serializer->getParentNode( $refNode ), $refNode ];
114 } else {
115 $refNode = $refElement->userData;
116 $refData = $refNode->snData;
117 if ( $refData->currentCloneElement ) {
118 // Follow a chain of clone links if necessary
119 $origRefData = $refData;
120 while ( $refData->currentCloneElement ) {
121 $refElement = $refData->currentCloneElement;
122 $refNode = $refElement->userData;
123 $refData = $refNode->snData;
124 }
125 // Cache the end of the chain in the requested element
126 $origRefData->currentCloneElement = $refElement;
127 } elseif ( $refData->childPElement ) {
128 $refElement = $refData->childPElement;
129 $refNode = $refElement->userData;
130 }
131 return [ $refNode, $refNode ];
132 }
133 }
134
135 /**
136 * Insert a p-wrapper
137 *
138 * @param SerializerNode $parent
139 * @param int $sourceStart
140 * @return SerializerNode
141 */
142 private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
143 $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
144 $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
145 $sourceStart, 0 );
146 $data = new RemexMungerData;
147 $data->isPWrapper = true;
148 $data->wrapBaseNode = $parent;
149 $pWrap->userData->snData = $data;
150 $parent->snData->childPElement = $pWrap;
151 return $pWrap->userData;
152 }
153
154 public function characters( $preposition, $refElement, $text, $start, $length,
155 $sourceStart, $sourceLength
156 ) {
157 $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
158
159 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
160 $parentData = $parent->snData;
161
162 if ( $preposition === TreeBuilder::UNDER ) {
163 if ( $parentData->needsPWrapping && !$isBlank ) {
164 // Add a p-wrapper for bare text under body/blockquote
165 $refNode = $this->insertPWrapper( $refNode, $sourceStart );
166 $parent = $refNode;
167 $parentData = $parent->snData;
168 } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
169 // The parent is splittable and in block mode, so split the tag stack
170 $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
171 $parent = $refNode;
172 $parentData = $parent->snData;
173 }
174 }
175
176 if ( !$isBlank ) {
177 // Non-whitespace characters detected
178 $parentData->nonblankNodeCount++;
179 }
180 $this->serializer->characters( $preposition, $refNode, $text, $start,
181 $length, $sourceStart, $sourceLength );
182 }
183
184 private function trace( $msg ) {
185 // echo "[RCM] $msg\n";
186 }
187
188 /**
189 * Insert or reparent an element. Create p-wrappers or split the tag stack
190 * as necessary.
191 *
192 * Consider the following insertion locations. The parent may be:
193 *
194 * - A: A body or blockquote (!!needsPWrapping)
195 * - B: A p-wrapper (!!isPWrapper)
196 * - C: A descendant of a p-wrapper (!!ancestorPNode)
197 * - CS: With splittable formatting elements in the stack region up to
198 * the p-wrapper
199 * - CU: With one or more unsplittable elements in the stack region up
200 * to the p-wrapper
201 * - D: Not a descendant of a p-wrapper (!ancestorNode)
202 * - DS: With splittable formatting elements in the stack region up to
203 * the body or blockquote
204 * - DU: With one or more unsplittable elements in the stack region up
205 * to the body or blockquote
206 *
207 * And consider that we may insert two types of element:
208 * - b: block
209 * - i: inline
210 *
211 * We handle the insertion as follows:
212 *
213 * - A/i: Create a p-wrapper, insert under it
214 * - A/b: Insert as normal
215 * - B/i: Insert as normal
216 * - B/b: Close the p-wrapper, insert under the body/blockquote (wrap
217 * base) instead)
218 * - C/i: Insert as normal
219 * - CS/b: Split the tag stack, insert the block under cloned formatting
220 * elements which have the wrap base (the parent of the p-wrap) as
221 * their ultimate parent.
222 * - CU/b: Disable the p-wrap, by reparenting the currently open child
223 * of the p-wrap under the p-wrap's parent. Then insert the block as
224 * normal.
225 * - D/b: Insert as normal
226 * - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate
227 * parent of the formatting elements thus cloned. The parent of the
228 * p-wrapper is the body or blockquote.
229 * - DU/i: Insert as normal
230 *
231 * FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as
232 * normal, the full algorithm is not followed.
233 *
234 * @param int $preposition
235 * @param Element|SerializerNode|null $refElement
236 * @param Element $element
237 * @param bool $void
238 * @param int $sourceStart
239 * @param int $sourceLength
240 */
241 public function insertElement( $preposition, $refElement, Element $element, $void,
242 $sourceStart, $sourceLength
243 ) {
244 list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
245 $parentData = $parent->snData;
246 $parentNs = $parent->namespace;
247 $parentName = $parent->name;
248 $elementName = $element->htmlName;
249
250 $inline = isset( self::$onlyInlineElements[$elementName] );
251 $under = $preposition === TreeBuilder::UNDER;
252
253 if ( $under && $parentData->isPWrapper && !$inline ) {
254 // [B/b] The element is non-inline and the parent is a p-wrapper,
255 // close the parent and insert into its parent instead
256 $this->trace( 'insert B/b' );
257 $newParent = $this->serializer->getParentNode( $parent );
258 $parent = $newParent;
259 $parentData = $parent->snData;
260 $pElement = $parentData->childPElement;
261 $parentData->childPElement = null;
262 $newRef = $refElement->userData;
263 $this->endTag( $pElement, $sourceStart, 0 );
264 } elseif ( $under && $parentData->isSplittable
265 && (bool)$parentData->ancestorPNode !== $inline
266 ) {
267 // [CS/b, DS/i] The parent is splittable and the current element is
268 // inline in block context, or if the current element is a block
269 // under a p-wrapper, split the tag stack.
270 $this->trace( $inline ? 'insert DS/i' : 'insert CS/b' );
271 $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
272 $parent = $newRef;
273 $parentData = $parent->snData;
274 } elseif ( $under && $parentData->needsPWrapping && $inline ) {
275 // [A/i] If the element is inline and we are in body/blockquote,
276 // we need to create a p-wrapper
277 $this->trace( 'insert A/i' );
278 $newRef = $this->insertPWrapper( $newRef, $sourceStart );
279 $parent = $newRef;
280 $parentData = $parent->snData;
281 } elseif ( $parentData->ancestorPNode && !$inline ) {
282 // [CU/b] If the element is non-inline and (despite attempting to
283 // split above) there is still an ancestor p-wrap, disable that
284 // p-wrap
285 $this->trace( 'insert CU/b' );
286 $this->disablePWrapper( $parent, $sourceStart );
287 } else {
288 // [A/b, B/i, C/i, D/b, DU/i] insert as normal
289 $this->trace( 'insert normal' );
290 }
291
292 // An element with element children is a non-blank element
293 $parentData->nonblankNodeCount++;
294
295 // Insert the element downstream and so initialise its userData
296 $this->serializer->insertElement( $preposition, $newRef,
297 $element, $void, $sourceStart, $sourceLength );
298
299 // Initialise snData
300 if ( !$element->userData->snData ) {
301 $elementData = $element->userData->snData = new RemexMungerData;
302 } else {
303 $elementData = $element->userData->snData;
304 }
305 if ( ( $parentData->isPWrapper || $parentData->isSplittable )
306 && isset( self::$formattingElements[$elementName] )
307 ) {
308 $elementData->isSplittable = true;
309 }
310 if ( $parentData->isPWrapper ) {
311 $elementData->ancestorPNode = $parent;
312 } elseif ( $parentData->ancestorPNode ) {
313 $elementData->ancestorPNode = $parentData->ancestorPNode;
314 }
315 if ( $parentData->wrapBaseNode ) {
316 $elementData->wrapBaseNode = $parentData->wrapBaseNode;
317 } elseif ( $parentData->needsPWrapping ) {
318 $elementData->wrapBaseNode = $parent;
319 }
320 if ( $elementName === 'body'
321 || $elementName === 'blockquote'
322 || $elementName === 'html'
323 ) {
324 $elementData->needsPWrapping = true;
325 }
326 }
327
328 /**
329 * Clone nodes in a stack range and return the new parent
330 *
331 * @param SerializerNode $parentNode
332 * @param bool $inline
333 * @param int $pos The source position
334 * @return SerializerNode
335 */
336 private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
337 $parentData = $parentNode->snData;
338 $wrapBase = $parentData->wrapBaseNode;
339 $pWrap = $parentData->ancestorPNode;
340 if ( !$pWrap ) {
341 $cloneEnd = $wrapBase;
342 } else {
343 $cloneEnd = $parentData->ancestorPNode;
344 }
345
346 $serializer = $this->serializer;
347 $node = $parentNode;
348 $root = $serializer->getRootNode();
349 $nodes = [];
350 $removableNodes = [];
351 $haveContent = false;
352 while ( $node !== $cloneEnd ) {
353 $nextParent = $serializer->getParentNode( $node );
354 if ( $nextParent === $root ) {
355 throw new \Exception( 'Did not find end of clone range' );
356 }
357 $nodes[] = $node;
358 if ( $node->snData->nonblankNodeCount === 0 ) {
359 $removableNodes[] = $node;
360 $nextParent->snData->nonblankNodeCount--;
361 }
362 $node = $nextParent;
363 }
364
365 if ( $inline ) {
366 $pWrap = $this->insertPWrapper( $wrapBase, $pos );
367 $node = $pWrap;
368 } else {
369 if ( $pWrap ) {
370 // End the p-wrap which was open, cancel the diversion
371 $wrapBase->snData->childPElement = null;
372 }
373 $pWrap = null;
374 $node = $wrapBase;
375 }
376
377 for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
378 $oldNode = $nodes[$i];
379 $oldData = $oldNode->snData;
380 $nodeParent = $node;
381 $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
382 $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
383 $element, false, $pos, 0 );
384 $oldData->currentCloneElement = $element;
385
386 $newNode = $element->userData;
387 $newData = $newNode->snData = new RemexMungerData;
388 if ( $pWrap ) {
389 $newData->ancestorPNode = $pWrap;
390 }
391 $newData->isSplittable = true;
392 $newData->wrapBaseNode = $wrapBase;
393 $newData->isPWrapper = $oldData->isPWrapper;
394
395 $nodeParent->snData->nonblankNodeCount++;
396
397 $node = $newNode;
398 }
399 foreach ( $removableNodes as $rNode ) {
400 $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
401 $fakeElement->userData = $rNode;
402 $this->serializer->removeNode( $fakeElement, $pos );
403 }
404 return $node;
405 }
406
407 /**
408 * Find the ancestor of $node which is a child of a p-wrapper, and
409 * reparent that node so that it is placed after the end of the p-wrapper
410 */
411 private function disablePWrapper( SerializerNode $node, $sourceStart ) {
412 $nodeData = $node->snData;
413 $pWrapNode = $nodeData->ancestorPNode;
414 $newParent = $this->serializer->getParentNode( $pWrapNode );
415 if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
416 // Fostering or something? Abort!
417 return;
418 }
419
420 $nextParent = $node;
421 do {
422 $victim = $nextParent;
423 $victim->snData->ancestorPNode = null;
424 $nextParent = $this->serializer->getParentNode( $victim );
425 } while ( $nextParent !== $pWrapNode );
426
427 // Make a fake Element to use in a reparenting operation
428 $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
429 $victimElement->userData = $victim;
430
431 // Reparent
432 $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
433 false, $sourceStart, 0 );
434
435 // Decrement nonblank node count
436 $pWrapNode->snData->nonblankNodeCount--;
437
438 // Cancel the diversion so that no more elements are inserted under this p-wrap
439 $newParent->snData->childPElement = null;
440 }
441
442 public function endTag( Element $element, $sourceStart, $sourceLength ) {
443 $data = $element->userData->snData;
444 if ( $data->childPElement ) {
445 $this->endTag( $data->childPElement, $sourceStart, 0 );
446 }
447 $this->serializer->endTag( $element, $sourceStart, $sourceLength );
448 $element->userData->snData = null;
449 $element->userData = null;
450 }
451
452 public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
453 $this->serializer->doctype( $name, $public, $system, $quirks,
454 $sourceStart, $sourceLength );
455 }
456
457 public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
458 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
459 $this->serializer->comment( $preposition, $refNode, $text,
460 $sourceStart, $sourceLength );
461 }
462
463 public function error( $text, $pos ) {
464 $this->serializer->error( $text, $pos );
465 }
466
467 public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
468 $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
469 }
470
471 public function removeNode( Element $element, $sourceStart ) {
472 $this->serializer->removeNode( $element, $sourceStart );
473 }
474
475 public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
476 $self = $element->userData;
477 if ( $self->snData->childPElement ) {
478 // Reparent under the p-wrapper instead, so that e.g.
479 // <blockquote><mw:p-wrap>...</mw:p-wrap></blockquote>
480 // becomes
481 // <blockquote><mw:p-wrap><i>...</i></mw:p-wrap></blockquote>
482
483 // The formatting element should not be the parent of the p-wrap.
484 // Without this special case, the insertElement() of the <i> below
485 // would be diverted into the p-wrapper, causing infinite recursion
486 // (T178632)
487 $this->reparentChildren( $self->snData->childPElement, $newParent, $sourceStart );
488 return;
489 }
490
491 $children = $self->children;
492 $self->children = [];
493 $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
494 $newParentNode = $newParent->userData;
495 $newParentId = $newParentNode->id;
496 foreach ( $children as $child ) {
497 if ( is_object( $child ) ) {
498 $this->trace( "reparent <{$child->name}>" );
499 $child->parentId = $newParentId;
500 }
501 }
502 $newParentNode->children = $children;
503 }
504 }