Merge "Add support for PHP7 random_bytes in favor of mcrypt_create_iv"
[lhc/web/wiklou.git] / includes / tidy / RemexCompatMunger.php
1 <?php
2
3 namespace MediaWiki\Tidy;
4
5 use RemexHtml\HTMLData;
6 use RemexHtml\Serializer\Serializer;
7 use RemexHtml\Serializer\SerializerNode;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
10 use RemexHtml\TreeBuilder\TreeBuilder;
11 use RemexHtml\TreeBuilder\TreeHandler;
12 use RemexHtml\TreeBuilder\Element;
13
14 /**
15 * @internal
16 */
17 class RemexCompatMunger implements TreeHandler {
18 private static $onlyInlineElements = [
19 "a" => true,
20 "abbr" => true,
21 "acronym" => true,
22 "applet" => true,
23 "b" => true,
24 "basefont" => true,
25 "bdo" => true,
26 "big" => true,
27 "br" => true,
28 "button" => true,
29 "cite" => true,
30 "code" => true,
31 "dfn" => true,
32 "em" => true,
33 "font" => true,
34 "i" => true,
35 "iframe" => true,
36 "img" => true,
37 "input" => true,
38 "kbd" => true,
39 "label" => true,
40 "legend" => true,
41 "map" => true,
42 "object" => true,
43 "param" => true,
44 "q" => true,
45 "rb" => true,
46 "rbc" => true,
47 "rp" => true,
48 "rt" => true,
49 "rtc" => true,
50 "ruby" => true,
51 "s" => true,
52 "samp" => true,
53 "select" => true,
54 "small" => true,
55 "span" => true,
56 "strike" => true,
57 "strong" => true,
58 "sub" => true,
59 "sup" => true,
60 "textarea" => true,
61 "tt" => true,
62 "u" => true,
63 "var" => true,
64 ];
65
66 private static $formattingElements = [
67 'a' => true,
68 'b' => true,
69 'big' => true,
70 'code' => true,
71 'em' => true,
72 'font' => true,
73 'i' => true,
74 'nobr' => true,
75 's' => true,
76 'small' => true,
77 'strike' => true,
78 'strong' => true,
79 'tt' => true,
80 'u' => true,
81 ];
82
83 /**
84 * Constructor
85 *
86 * @param Serializer $serializer
87 */
88 public function __construct( Serializer $serializer ) {
89 $this->serializer = $serializer;
90 }
91
92 public function startDocument( $fragmentNamespace, $fragmentName ) {
93 $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
94 $root = $this->serializer->getRootNode();
95 $root->snData = new RemexMungerData;
96 $root->snData->needsPWrapping = true;
97 }
98
99 public function endDocument( $pos ) {
100 $this->serializer->endDocument( $pos );
101 }
102
103 private function getParentForInsert( $preposition, $refElement ) {
104 if ( $preposition === TreeBuilder::ROOT ) {
105 return [ $this->serializer->getRootNode(), null ];
106 } elseif ( $preposition === TreeBuilder::BEFORE ) {
107 $refNode = $refElement->userData;
108 return [ $this->serializer->getParentNode( $refNode ), $refNode ];
109 } else {
110 $refNode = $refElement->userData;
111 $refData = $refNode->snData;
112 if ( $refData->currentCloneElement ) {
113 // Follow a chain of clone links if necessary
114 $origRefData = $refData;
115 while ( $refData->currentCloneElement ) {
116 $refElement = $refData->currentCloneElement;
117 $refNode = $refElement->userData;
118 $refData = $refNode->snData;
119 }
120 // Cache the end of the chain in the requested element
121 $origRefData->currentCloneElement = $refElement;
122 } elseif ( $refData->childPElement ) {
123 $refElement = $refData->childPElement;
124 $refNode = $refElement->userData;
125 }
126 return [ $refNode, $refNode ];
127 }
128 }
129
130 /**
131 * Insert a p-wrapper
132 *
133 * @param SerializerNode $parent
134 * @param integer $sourceStart
135 * @return SerializerNode
136 */
137 private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
138 $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
139 $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
140 $sourceStart, 0 );
141 $data = new RemexMungerData;
142 $data->isPWrapper = true;
143 $data->wrapBaseNode = $parent;
144 $pWrap->userData->snData = $data;
145 $parent->snData->childPElement = $pWrap;
146 return $pWrap->userData;
147 }
148
149 public function characters( $preposition, $refElement, $text, $start, $length,
150 $sourceStart, $sourceLength
151 ) {
152 $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
153
154 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
155 $parentData = $parent->snData;
156
157 if ( $preposition === TreeBuilder::UNDER ) {
158 if ( $parentData->needsPWrapping && !$isBlank ) {
159 // Add a p-wrapper for bare text under body/blockquote
160 $refNode = $this->insertPWrapper( $refNode, $sourceStart );
161 $parent = $refNode;
162 $parentData = $parent->snData;
163 } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
164 // The parent is splittable and in block mode, so split the tag stack
165 $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
166 $parent = $refNode;
167 $parentData = $parent->snData;
168 }
169 }
170
171 if ( !$isBlank ) {
172 // Non-whitespace characters detected
173 $parentData->nonblankNodeCount++;
174 }
175 $this->serializer->characters( $preposition, $refNode, $text, $start,
176 $length, $sourceStart, $sourceLength );
177 }
178
179 /**
180 * Insert or reparent an element. Create p-wrappers or split the tag stack
181 * as necessary.
182 *
183 * Consider the following insertion locations. The parent may be:
184 *
185 * - A: A body or blockquote (!!needsPWrapping)
186 * - B: A p-wrapper (!!isPWrapper)
187 * - C: A descendant of a p-wrapper (!!ancestorPNode)
188 * - CS: With splittable formatting elements in the stack region up to
189 * the p-wrapper
190 * - CU: With one or more unsplittable elements in the stack region up
191 * to the p-wrapper
192 * - D: Not a descendant of a p-wrapper (!ancestorNode)
193 * - DS: With splittable formatting elements in the stack region up to
194 * the body or blockquote
195 * - DU: With one or more unsplittable elements in the stack region up
196 * to the body or blockquote
197 *
198 * And consider that we may insert two types of element:
199 * - b: block
200 * - i: inline
201 *
202 * We handle the insertion as follows:
203 *
204 * - A/i: Create a p-wrapper, insert under it
205 * - A/b: Insert as normal
206 * - B/i: Insert as normal
207 * - B/b: Close the p-wrapper, insert under the body/blockquote (wrap
208 * base) instead)
209 * - C/i: Insert as normal
210 * - CS/b: Split the tag stack, insert the block under cloned formatting
211 * elements which have the wrap base (the parent of the p-wrap) as
212 * their ultimate parent.
213 * - CU/b: Disable the p-wrap, by reparenting the currently open child
214 * of the p-wrap under the p-wrap's parent. Then insert the block as
215 * normal.
216 * - D/b: Insert as normal
217 * - DS/i: Split the tag stack, creating a new p-wrapper as the ultimate
218 * parent of the formatting elements thus cloned. The parent of the
219 * p-wrapper is the body or blockquote.
220 * - DU/i: Insert as normal
221 *
222 * FIXME: fostering ($preposition == BEFORE) is mostly done by inserting as
223 * normal, the full algorithm is not followed.
224 *
225 * @param integer $preposition
226 * @param Element|SerializerNode|null $refElement
227 * @param Element $element
228 * @param bool $void
229 * @param integer $sourceStart
230 * @param integer $sourceLength
231 */
232
233 public function insertElement( $preposition, $refElement, Element $element, $void,
234 $sourceStart, $sourceLength
235 ) {
236 list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
237 $parentData = $parent->snData;
238 $parentNs = $parent->namespace;
239 $parentName = $parent->name;
240 $elementName = $element->htmlName;
241
242 $inline = isset( self::$onlyInlineElements[$elementName] );
243 $under = $preposition === TreeBuilder::UNDER;
244
245 if ( $under && $parentData->isPWrapper && !$inline ) {
246 // [B/b] The element is non-inline and the parent is a p-wrapper,
247 // close the parent and insert into its parent instead
248 $newParent = $this->serializer->getParentNode( $parent );
249 $parent = $newParent;
250 $parentData = $parent->snData;
251 $parentData->childPElement = null;
252 $newRef = $refElement->userData;
253 // FIXME cannot call endTag() since we don't have an Element
254 } elseif ( $under && $parentData->isSplittable
255 && (bool)$parentData->ancestorPNode !== $inline
256 ) {
257 // [CS/b, DS/i] The parent is splittable and the current element is
258 // inline in block context, or if the current element is a block
259 // under a p-wrapper, split the tag stack.
260 $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
261 $parent = $newRef;
262 $parentData = $parent->snData;
263 } elseif ( $under && $parentData->needsPWrapping && $inline ) {
264 // [A/i] If the element is inline and we are in body/blockquote,
265 // we need to create a p-wrapper
266 $newRef = $this->insertPWrapper( $newRef, $sourceStart );
267 $parent = $newRef;
268 $parentData = $parent->snData;
269 } elseif ( $parentData->ancestorPNode && !$inline ) {
270 // [CU/b] If the element is non-inline and (despite attempting to
271 // split above) there is still an ancestor p-wrap, disable that
272 // p-wrap
273 $this->disablePWrapper( $parent, $sourceStart );
274 }
275 // else [A/b, B/i, C/i, D/b, DU/i] insert as normal
276
277 // An element with element children is a non-blank element
278 $parentData->nonblankNodeCount++;
279
280 // Insert the element downstream and so initialise its userData
281 $this->serializer->insertElement( $preposition, $newRef,
282 $element, $void, $sourceStart, $sourceLength );
283
284 // Initialise snData
285 if ( !$element->userData->snData ) {
286 $elementData = $element->userData->snData = new RemexMungerData;
287 } else {
288 $elementData = $element->userData->snData;
289 }
290 if ( ( $parentData->isPWrapper || $parentData->isSplittable )
291 && isset( self::$formattingElements[$elementName] )
292 ) {
293 $elementData->isSplittable = true;
294 }
295 if ( $parentData->isPWrapper ) {
296 $elementData->ancestorPNode = $parent;
297 } elseif ( $parentData->ancestorPNode ) {
298 $elementData->ancestorPNode = $parentData->ancestorPNode;
299 }
300 if ( $parentData->wrapBaseNode ) {
301 $elementData->wrapBaseNode = $parentData->wrapBaseNode;
302 } elseif ( $parentData->needsPWrapping ) {
303 $elementData->wrapBaseNode = $parent;
304 }
305 if ( $elementName === 'body'
306 || $elementName === 'blockquote'
307 || $elementName === 'html'
308 ) {
309 $elementData->needsPWrapping = true;
310 }
311 }
312
313 /**
314 * Clone nodes in a stack range and return the new parent
315 *
316 * @param SerializerNode $parentNode
317 * @param bool $inline
318 * @param integer $pos The source position
319 * @return SerializerNode
320 */
321 private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
322 $parentData = $parentNode->snData;
323 $wrapBase = $parentData->wrapBaseNode;
324 $pWrap = $parentData->ancestorPNode;
325 if ( !$pWrap ) {
326 $cloneEnd = $wrapBase;
327 } else {
328 $cloneEnd = $parentData->ancestorPNode;
329 }
330
331 $serializer = $this->serializer;
332 $node = $parentNode;
333 $root = $serializer->getRootNode();
334 $nodes = [];
335 $removableNodes = [];
336 $haveContent = false;
337 while ( $node !== $cloneEnd ) {
338 $nextParent = $serializer->getParentNode( $node );
339 if ( $nextParent === $root ) {
340 throw new \Exception( 'Did not find end of clone range' );
341 }
342 $nodes[] = $node;
343 if ( $node->snData->nonblankNodeCount === 0 ) {
344 $removableNodes[] = $node;
345 $nextParent->snData->nonblankNodeCount--;
346 }
347 $node = $nextParent;
348 }
349
350 if ( $inline ) {
351 $pWrap = $this->insertPWrapper( $wrapBase, $pos );
352 $node = $pWrap;
353 } else {
354 if ( $pWrap ) {
355 // End the p-wrap which was open, cancel the diversion
356 $wrapBase->snData->childPElement = null;
357 }
358 $pWrap = null;
359 $node = $wrapBase;
360 }
361
362 for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
363 $oldNode = $nodes[$i];
364 $oldData = $oldNode->snData;
365 $nodeParent = $node;
366 $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
367 $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
368 $element, false, $pos, 0 );
369 $oldData->currentCloneElement = $element;
370
371 $newNode = $element->userData;
372 $newData = $newNode->snData = new RemexMungerData;
373 if ( $pWrap ) {
374 $newData->ancestorPNode = $pWrap;
375 }
376 $newData->isSplittable = true;
377 $newData->wrapBaseNode = $wrapBase;
378 $newData->isPWrapper = $oldData->isPWrapper;
379
380 $nodeParent->snData->nonblankNodeCount++;
381
382 $node = $newNode;
383 }
384 foreach ( $removableNodes as $rNode ) {
385 $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
386 $fakeElement->userData = $rNode;
387 $this->serializer->removeNode( $fakeElement, $pos );
388 }
389 return $node;
390 }
391
392 /**
393 * Find the ancestor of $node which is a child of a p-wrapper, and
394 * reparent that node so that it is placed after the end of the p-wrapper
395 */
396 private function disablePWrapper( SerializerNode $node, $sourceStart ) {
397 $nodeData = $node->snData;
398 $pWrapNode = $nodeData->ancestorPNode;
399 $newParent = $this->serializer->getParentNode( $pWrapNode );
400 if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
401 // Fostering or something? Abort!
402 return;
403 }
404
405 $nextParent = $node;
406 do {
407 $victim = $nextParent;
408 $victim->snData->ancestorPNode = null;
409 $nextParent = $this->serializer->getParentNode( $victim );
410 } while ( $nextParent !== $pWrapNode );
411
412 // Make a fake Element to use in a reparenting operation
413 $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
414 $victimElement->userData = $victim;
415
416 // Reparent
417 $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
418 false, $sourceStart, 0 );
419
420 // Decrement nonblank node count
421 $pWrapNode->snData->nonblankNodeCount--;
422
423 // Cancel the diversion so that no more elements are inserted under this p-wrap
424 $newParent->snData->childPElement = null;
425 }
426
427 public function endTag( Element $element, $sourceStart, $sourceLength ) {
428 $this->serializer->endTag( $element, $sourceStart, $sourceLength );
429 }
430
431 public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
432 $this->serializer->doctype( $name, $public, $system, $quirks,
433 $sourceStart, $sourceLength );
434 }
435
436 public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
437 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
438 $this->serializer->comment( $preposition, $refNode, $text,
439 $sourceStart, $sourceLength );
440 }
441
442 public function error( $text, $pos ) {
443 $this->serializer->error( $text, $pos );
444 }
445
446 public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
447 $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
448 }
449
450 public function removeNode( Element $element, $sourceStart ) {
451 $this->serializer->removeNode( $element, $sourceStart );
452 }
453
454 public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
455 $self = $element->userData;
456 $children = $self->children;
457 $self->children = [];
458 $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
459 $newParentNode = $newParent->userData;
460 $newParentId = $newParentNode->id;
461 foreach ( $children as $child ) {
462 if ( is_object( $child ) ) {
463 $child->parentId = $newParentId;
464 }
465 }
466 $newParentNode->children = $children;
467 }
468 }