* Split off deletedrevision (view only) right and give it to sysops
[lhc/web/wiklou.git] / includes / diff / Nodes.php
1 <?php
2
3 /** Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * or see http://www.gnu.org/
19 *
20 */
21
22 /**
23 * Any element in the DOM tree of an HTML document.
24 * @ingroup DifferenceEngine
25 */
26 class Node {
27
28 public $parent;
29
30 protected $parentTree;
31
32 public $whiteBefore = false;
33
34 public $whiteAfter = false;
35
36 function __construct($parent) {
37 $this->parent = $parent;
38 }
39
40 public function getParentTree() {
41 if (!isset($this->parentTree)) {
42 if (!is_null($this->parent)) {
43 $this->parentTree = $this->parent->getParentTree();
44 $this->parentTree[] = $this->parent;
45 } else {
46 $this->parentTree = array();
47 }
48 }
49 return $this->parentTree;
50 }
51
52 public function getLastCommonParent(Node $other) {
53 $result = new LastCommonParentResult();
54
55 $myParents = $this->getParentTree();
56 $otherParents = $other->getParentTree();
57
58 $i = 1;
59 $isSame = true;
60 $nbMyParents = count($myParents);
61 $nbOtherParents = count($otherParents);
62 while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) {
63 if (!$myParents[$i]->openingTag === $otherParents[$i]->openingTag) {
64 $isSame = false;
65 } else {
66 // After a while, the index i-1 must be the last common parent
67 $i++;
68 }
69 }
70
71 $result->lastCommonParentDepth = $i - 1;
72 $result->parent = $myParents[$i - 1];
73
74 if (!$isSame || $nbMyParents > $nbOtherParents) {
75 // Not all tags matched, or all tags matched but
76 // there are tags left in this tree
77 $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($myParents[$i]);
78 $result->splittingNeeded = true;
79 } else if ($nbMyParents <= $nbOtherParents) {
80 $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($this);
81 }
82 return $result;
83 }
84
85 public function setParent($parent) {
86 $this->parent = $parent;
87 unset($this->parentTree);
88 }
89
90 public function inPre() {
91 $tree = $this->getParentTree();
92 foreach ($tree as &$ancestor) {
93 if ($ancestor->isPre()) {
94 return true;
95 }
96 }
97 return false;
98 }
99 }
100
101 /**
102 * Node that can contain other nodes. Represents an HTML tag.
103 * @ingroup DifferenceEngine
104 */
105 class TagNode extends Node {
106
107 public $children = array();
108
109 public $qName;
110
111 public $attributes = array();
112
113 public $openingTag;
114
115 function __construct($parent, $qName, /*array*/ $attributes) {
116 parent::__construct($parent);
117 $this->qName = strtolower($qName);
118 foreach($attributes as $key => &$value){
119 $this->attributes[strtolower($key)] = $value;
120 }
121 return $this->openingTag = Xml::openElement($this->qName, $this->attributes);
122 }
123
124 public function addChildAbsolute(Node $node, $index) {
125 array_splice($this->children, $index, 0, array($node));
126 }
127
128 public function getIndexOf(Node $child) {
129 // don't trust array_search with objects
130 foreach ($this->children as $key => &$value){
131 if ($value === $child) {
132 return $key;
133 }
134 }
135 return null;
136 }
137
138 public function getNbChildren() {
139 return count($this->children);
140 }
141
142 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
143 $nodes = array();
144
145 $allDeleted = false;
146 $somethingDeleted = false;
147 $hasNonDeletedDescendant = false;
148
149 if (empty($this->children)) {
150 return $nodes;
151 }
152
153 foreach ($this->children as &$child) {
154 $allDeleted_local = false;
155 $somethingDeleted_local = false;
156 $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local);
157 if ($somethingDeleted_local) {
158 $nodes = array_merge($nodes, $childrenChildren);
159 $somethingDeleted = true;
160 }
161 if (!$allDeleted_local) {
162 $hasNonDeletedDescendant = true;
163 }
164 }
165 if (!$hasNonDeletedDescendant) {
166 $nodes = array($this);
167 $allDeleted = true;
168 }
169 return $nodes;
170 }
171
172 public function splitUntil(TagNode $parent, Node $split, $includeLeft) {
173 $splitOccured = false;
174 if ($parent !== $this) {
175 $part1 = new TagNode(null, $this->qName, $this->attributes);
176 $part2 = new TagNode(null, $this->qName, $this->attributes);
177 $part1->setParent($this->parent);
178 $part2->setParent($this->parent);
179
180 $onSplit = false;
181 $pastSplit = false;
182 foreach ($this->children as &$child)
183 {
184 if ($child === $split) {
185 $onSplit = true;
186 }
187 if(!$pastSplit || ($onSplit && $includeLeft)) {
188 $child->setParent($part1);
189 $part1->children[] = $child;
190 } else {
191 $child->setParent($part2);
192 $part2->children[] = $child;
193 }
194 if ($onSplit) {
195 $onSplit = false;
196 $pastSplit = true;
197 }
198 }
199 $myindexinparent = $this->parent->getIndexOf($this);
200 if (!empty($part1->children)) {
201 $this->parent->addChildAbsolute($part1, $myindexinparent);
202 }
203 if (!empty($part2->children)) {
204 $this->parent->addChildAbsolute($part2, $myindexinparent);
205 }
206 if (!empty($part1->children) && !empty($part2->children)) {
207 $splitOccured = true;
208 }
209
210 $this->parent->removeChild($myindexinparent);
211
212 if ($includeLeft) {
213 $this->parent->splitUntil($parent, $part1, $includeLeft);
214 } else {
215 $this->parent->splitUntil($parent, $part2, $includeLeft);
216 }
217 }
218 return $splitOccured;
219
220 }
221
222 private function removeChild($index) {
223 unset($this->children[$index]);
224 $this->children = array_values($this->children);
225 }
226
227 public static $blocks = array('html', 'body','p','blockquote', 'h1',
228 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table',
229 'tbody', 'tr', 'td', 'th', 'br');
230
231 public function copyTree() {
232 $newThis = new TagNode(null, $this->qName, $this->attributes);
233 $newThis->whiteBefore = $this->whiteBefore;
234 $newThis->whiteAfter = $this->whiteAfter;
235 foreach ($this->children as &$child) {
236 $newChild = $child->copyTree();
237 $newChild->setParent($newThis);
238 $newThis->children[] = $newChild;
239 }
240 return $newThis;
241 }
242
243 public function getMatchRatio(TagNode $other) {
244 $txtComp = new TextOnlyComparator($other);
245 return $txtComp->getMatchRatio(new TextOnlyComparator($this));
246 }
247
248 public function expandWhiteSpace() {
249 $shift = 0;
250 $spaceAdded = false;
251
252 $nbOriginalChildren = $this->getNbChildren();
253 for ($i = 0; $i < $nbOriginalChildren; ++$i) {
254 $child = $this->children[$i + $shift];
255
256 if ($child instanceof TagNode) {
257 if (!$child->isPre()) {
258 $child->expandWhiteSpace();
259 }
260 }
261 if (!$spaceAdded && $child->whiteBefore) {
262 $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild());
263 $ws->setParent($this);
264 $this->addChildAbsolute($ws,$i + ($shift++));
265 }
266 if ($child->whiteAfter) {
267 $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild());
268 $ws->setParent($this);
269 $this->addChildAbsolute($ws,$i + 1 + ($shift++));
270 $spaceAdded = true;
271 } else {
272 $spaceAdded = false;
273 }
274
275 }
276 }
277
278 public function getLeftMostChild() {
279 if (empty($this->children)) {
280 return $this;
281 }
282 return $this->children[0]->getLeftMostChild();
283 }
284
285 public function getRightMostChild() {
286 if (empty($this->children)) {
287 return $this;
288 }
289 return $this->children[$this->getNbChildren() - 1]->getRightMostChild();
290 }
291
292 public function isPre() {
293 return 0 == strcasecmp($this->qName,'pre');
294 }
295
296 public static function toDiffLine(TagNode $node) {
297 return $node->openingTag;
298 }
299 }
300
301 /**
302 * Represents a piece of text in the HTML file.
303 * @ingroup DifferenceEngine
304 */
305 class TextNode extends Node {
306
307 public $text;
308
309 public $modification;
310
311 function __construct($parent, $text) {
312 parent::__construct($parent);
313 $this->modification = new Modification(Modification::NONE);
314 $this->text = $text;
315 }
316
317 public function copyTree() {
318 $clone = clone $this;
319 $clone->setParent(null);
320 return $clone;
321 }
322
323 public function getLeftMostChild() {
324 return $this;
325 }
326
327 public function getRightMostChild() {
328 return $this;
329 }
330
331 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
332 if ($this->modification->type == Modification::REMOVED
333 && $this->modification->id == $id){
334 $somethingDeleted = true;
335 $allDeleted = true;
336 return array($this);
337 }
338 return array();
339 }
340
341 public function isSameText($other) {
342 if (is_null($other) || ! $other instanceof TextNode) {
343 return false;
344 }
345 return str_replace('\n', ' ',$this->text) === str_replace('\n', ' ',$other->text);
346 }
347
348 public static function toDiffLine(TextNode $node) {
349 return str_replace('\n', ' ',$node->text);
350 }
351 }
352
353 /**
354 * @todo Document
355 * @ingroup DifferenceEngine
356 */
357 class WhiteSpaceNode extends TextNode {
358
359 function __construct($parent, $s, Node $like = null) {
360 parent::__construct($parent, $s);
361 if(!is_null($like) && $like instanceof TextNode) {
362 $newModification = clone $like->modification;
363 $newModification->firstOfID = false;
364 $this->modification = $newModification;
365 }
366 }
367 }
368
369 /**
370 * Represents the root of a HTML document.
371 * @ingroup DifferenceEngine
372 */
373 class BodyNode extends TagNode {
374
375 function __construct() {
376 parent::__construct(null, 'body', array());
377 }
378
379 public function copyTree() {
380 $newThis = new BodyNode();
381 foreach ($this->children as &$child) {
382 $newChild = $child->copyTree();
383 $newChild->setParent($newThis);
384 $newThis->children[] = $newChild;
385 }
386 return $newThis;
387 }
388
389 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
390 $nodes = array();
391 foreach ($this->children as &$child) {
392 $childrenChildren = $child->getMinimalDeletedSet($id,
393 $allDeleted, $somethingDeleted);
394 $nodes = array_merge($nodes, $childrenChildren);
395 }
396 return $nodes;
397 }
398
399 }
400
401 /**
402 * Represents an image in HTML. Even though images do not contain any text they
403 * are independent visible objects on the page. They are logically a TextNode.
404 * @ingroup DifferenceEngine
405 */
406 class ImageNode extends TextNode {
407
408 public $attributes;
409
410 function __construct(TagNode $parent, /*array*/ $attrs) {
411 if(!array_key_exists('src', $attrs)) {
412 HTMLDiffer::diffDebug( "Image without a source\n" );
413 parent::__construct($parent, '<img></img>');
414 }else{
415 parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
416 }
417 $this->attributes = $attrs;
418 }
419
420 public function isSameText($other) {
421 if (is_null($other) || ! $other instanceof ImageNode) {
422 return false;
423 }
424 return $this->text === $other->text;
425 }
426
427 }
428
429 /**
430 * No-op node
431 * @ingroup DifferenceEngine
432 */
433 class DummyNode extends Node {
434
435 function __construct() {
436 // no op
437 }
438
439 }