fix for html diff on page with images -- ImageNode $attributes member was incorrectly...
[lhc/web/wiklou.git] / includes / HTMLDiff.php
1 <?php
2 /* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * or see http://www.gnu.org/
18 */
19
20 /**
21 * Any element in the DOM tree of an HTML document.
22 */
23 class Node {
24
25 public $parent;
26
27 protected $parentTree;
28
29 public $whiteBefore = false;
30
31 public $whiteAfter = false;
32
33 function __construct($parent) {
34 $this->parent = $parent;
35 }
36
37 public function getParentTree() {
38 if (!isset($this->parentTree)) {
39 if (!is_null($this->parent)) {
40 $this->parentTree = $this->parent->getParentTree();
41 $this->parentTree[] = $this->parent;
42 } else {
43 $this->parentTree = array();
44 }
45 }
46 return $this->parentTree;
47 }
48
49 public function getLastCommonParent(Node $other) {
50 $result = new LastCommonParentResult();
51
52 $myParents = $this->getParentTree();
53 $otherParents = $other->getParentTree();
54
55 $i = 1;
56 $isSame = true;
57 $nbMyParents = count($myParents);
58 $nbOtherParents = count($otherParents);
59 while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) {
60 if (!$myParents[$i]->openingTag === $otherParents[$i]->openingTag) {
61 $isSame = false;
62 } else {
63 // After a while, the index i-1 must be the last common parent
64 $i++;
65 }
66 }
67
68 $result->lastCommonParentDepth = $i - 1;
69 $result->parent = $myParents[$i - 1];
70
71 if (!$isSame || $nbMyParents > $nbOtherParents) {
72 // Not all tags matched, or all tags matched but
73 // there are tags left in this tree
74 $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($myParents[$i]);
75 $result->splittingNeeded = true;
76 } else if ($nbMyParents <= $nbOtherParents) {
77 $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($this);
78 }
79 return $result;
80 }
81
82 public function setParent($parent) {
83 $this->parent = $parent;
84 unset($this->parentTree);
85 }
86
87 public function inPre() {
88 $tree = $this->getParentTree();
89 foreach ($tree as &$ancestor) {
90 if ($ancestor->isPre()) {
91 return true;
92 }
93 }
94 return false;
95 }
96 }
97
98 /**
99 * Node that can contain other nodes. Represents an HTML tag.
100 */
101 class TagNode extends Node {
102
103 public $children = array();
104
105 public $qName;
106
107 public $attributes = array();
108
109 public $openingTag;
110
111 function __construct($parent, $qName, /*array*/ $attributes) {
112 parent::__construct($parent);
113 $this->qName = strtolower($qName);
114 foreach($attributes as $key => &$value){
115 $this->attributes[strtolower($key)] = $value;
116 }
117 return $this->openingTag = Xml::openElement($this->qName, $this->attributes);
118 }
119
120 public function addChildAbsolute(Node $node, $index) {
121 array_splice($this->children, $index, 0, array($node));
122 }
123
124 public function getIndexOf(Node $child) {
125 // don't trust array_search with objects
126 foreach ($this->children as $key => &$value){
127 if ($value === $child) {
128 return $key;
129 }
130 }
131 return null;
132 }
133
134 public function getNbChildren() {
135 return count($this->children);
136 }
137
138 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
139 $nodes = array();
140
141 $allDeleted = false;
142 $somethingDeleted = false;
143 $hasNonDeletedDescendant = false;
144
145 if (empty($this->children)) {
146 return $nodes;
147 }
148
149 foreach ($this->children as &$child) {
150 $allDeleted_local = false;
151 $somethingDeleted_local = false;
152 $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local);
153 if ($somethingDeleted_local) {
154 $nodes = array_merge($nodes, $childrenChildren);
155 $somethingDeleted = true;
156 }
157 if (!$allDeleted_local) {
158 $hasNonDeletedDescendant = true;
159 }
160 }
161 if (!$hasNonDeletedDescendant) {
162 $nodes = array($this);
163 $allDeleted = true;
164 }
165 return $nodes;
166 }
167
168 public function splitUntil(TagNode $parent, Node $split, $includeLeft) {
169 $splitOccured = false;
170 if ($parent !== $this) {
171 $part1 = new TagNode(null, $this->qName, $this->attributes);
172 $part2 = new TagNode(null, $this->qName, $this->attributes);
173 $part1->setParent($this->parent);
174 $part2->setParent($this->parent);
175
176 $onSplit = false;
177 $pastSplit = false;
178 foreach ($this->children as &$child)
179 {
180 if ($child === $split) {
181 $onSplit = true;
182 }
183 if(!$pastSplit || ($onSplit && $includeLeft)) {
184 $child->setParent($part1);
185 $part1->children[] = $child;
186 } else {
187 $child->setParent($part2);
188 $part2->children[] = $child;
189 }
190 if ($onSplit) {
191 $onSplit = false;
192 $pastSplit = true;
193 }
194 }
195 $myindexinparent = $this->parent->getIndexOf($this);
196 if (!empty($part1->children)) {
197 $this->parent->addChildAbsolute($part1, $myindexinparent);
198 }
199 if (!empty($part2->children)) {
200 $this->parent->addChildAbsolute($part2, $myindexinparent);
201 }
202 if (!empty($part1->children) && !empty($part2->children)) {
203 $splitOccured = true;
204 }
205
206 $this->parent->removeChild($myindexinparent);
207
208 if ($includeLeft) {
209 $this->parent->splitUntil($parent, $part1, $includeLeft);
210 } else {
211 $this->parent->splitUntil($parent, $part2, $includeLeft);
212 }
213 }
214 return $splitOccured;
215
216 }
217
218 private function removeChild($index) {
219 unset($this->children[$index]);
220 $this->children = array_values($this->children);
221 }
222
223 public static $blocks = array('html', 'body','p','blockquote', 'h1',
224 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table',
225 'tbody', 'tr', 'td', 'th', 'br');
226
227 public function copyTree() {
228 $newThis = new TagNode(null, $this->qName, $this->attributes);
229 $newThis->whiteBefore = $this->whiteBefore;
230 $newThis->whiteAfter = $this->whiteAfter;
231 foreach ($this->children as &$child) {
232 $newChild = $child->copyTree();
233 $newChild->setParent($newThis);
234 $newThis->children[] = $newChild;
235 }
236 return $newThis;
237 }
238
239 public function getMatchRatio(TagNode $other) {
240 $txtComp = new TextOnlyComparator($other);
241 return $txtComp->getMatchRatio(new TextOnlyComparator($this));
242 }
243
244 public function expandWhiteSpace() {
245 $shift = 0;
246 $spaceAdded = false;
247
248 $nbOriginalChildren = $this->getNbChildren();
249 for ($i = 0; $i < $nbOriginalChildren; ++$i) {
250 $child = $this->children[$i + $shift];
251
252 if ($child instanceof TagNode) {
253 if (!$child->isPre()) {
254 $child->expandWhiteSpace();
255 }
256 }
257 if (!$spaceAdded && $child->whiteBefore) {
258 $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild());
259 $ws->setParent($this);
260 $this->addChildAbsolute($ws,$i + ($shift++));
261 }
262 if ($child->whiteAfter) {
263 $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild());
264 $ws->setParent($this);
265 $this->addChildAbsolute($ws,$i + 1 + ($shift++));
266 $spaceAdded = true;
267 } else {
268 $spaceAdded = false;
269 }
270
271 }
272 }
273
274 public function getLeftMostChild() {
275 if (empty($this->children)) {
276 return $this;
277 }
278 return $this->children[0]->getLeftMostChild();
279 }
280
281 public function getRightMostChild() {
282 if (empty($this->children)) {
283 return $this;
284 }
285 return $this->children[$this->getNbChildren() - 1]->getRightMostChild();
286 }
287
288 public function isPre() {
289 return 0 == strcasecmp($this->qName,'pre');
290 }
291
292 public static function toDiffLine(TagNode $node) {
293 return $node->openingTag;
294 }
295 }
296
297 /**
298 * Represents a piece of text in the HTML file.
299 */
300 class TextNode extends Node {
301
302 public $text;
303
304 public $modification;
305
306 function __construct($parent, $text) {
307 parent::__construct($parent);
308 $this->modification = new Modification(Modification::NONE);
309 $this->text = $text;
310 }
311
312 public function copyTree() {
313 $clone = clone $this;
314 $clone->setParent(null);
315 return $clone;
316 }
317
318 public function getLeftMostChild() {
319 return $this;
320 }
321
322 public function getRightMostChild() {
323 return $this;
324 }
325
326 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
327 if ($this->modification->type == Modification::REMOVED
328 && $this->modification->id == $id){
329 $somethingDeleted = true;
330 $allDeleted = true;
331 return array($this);
332 }
333 return array();
334 }
335
336 public function isSameText($other) {
337 if (is_null($other) || ! $other instanceof TextNode) {
338 return false;
339 }
340 return str_replace('\n', ' ',$this->text) === str_replace('\n', ' ',$other->text);
341 }
342
343 public static function toDiffLine(TextNode $node) {
344 return str_replace('\n', ' ',$node->text);
345 }
346 }
347
348 class WhiteSpaceNode extends TextNode {
349
350 function __construct($parent, $s, Node $like = null) {
351 parent::__construct($parent, $s);
352 if(!is_null($like) && $like instanceof TextNode) {
353 $newModification = clone $like->modification;
354 $newModification->firstOfID = false;
355 $this->modification = $newModification;
356 }
357 }
358 }
359
360 /**
361 * Represents the root of a HTML document.
362 */
363 class BodyNode extends TagNode {
364
365 function __construct() {
366 parent::__construct(null, 'body', array());
367 }
368
369 public function copyTree() {
370 $newThis = new BodyNode();
371 foreach ($this->children as &$child) {
372 $newChild = $child->copyTree();
373 $newChild->setParent($newThis);
374 $newThis->children[] = $newChild;
375 }
376 return $newThis;
377 }
378
379 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
380 $nodes = array();
381 foreach ($this->children as &$child) {
382 $childrenChildren = $child->getMinimalDeletedSet($id,
383 $allDeleted, $somethingDeleted);
384 $nodes = array_merge($nodes, $childrenChildren);
385 }
386 return $nodes;
387 }
388
389 }
390
391 /**
392 * Represents an image in HTML. Even though images do not contain any text they
393 * are independent visible objects on the page. They are logically a TextNode.
394 */
395 class ImageNode extends TextNode {
396
397 public $attributes;
398
399 function __construct(TagNode $parent, /*array*/ $attrs) {
400 if(!array_key_exists('src', $attrs)) {
401 //wfDebug('Image without a source:');
402 //foreach ($attrs as $key => &$value) {
403 //wfDebug("$key = $value");
404 //}
405 parent::__construct($parent, '<img></img>');
406 }else{
407 parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
408 }
409 $this->attributes = $attrs;
410 }
411
412 public function isSameText($other) {
413 if (is_null($other) || ! $other instanceof ImageNode) {
414 return false;
415 }
416 return $this->text === $other->text;
417 }
418
419 }
420
421 class DummyNode extends Node {
422
423 function __construct() {
424 // no op
425 }
426
427 }
428
429 /**
430 * When detecting the last common parent of two nodes, all results are stored as
431 * a LastCommonParentResult.
432 */
433 class LastCommonParentResult {
434
435 // Parent
436 public $parent;
437
438 // Splitting
439 public $splittingNeeded = false;
440
441 // Depth
442 public $lastCommonParentDepth = -1;
443
444 // Index
445 public $indexInLastCommonParent = -1;
446 }
447
448 class Modification{
449
450 const NONE = 1;
451 const REMOVED = 2;
452 const ADDED = 4;
453 const CHANGED = 8;
454
455 public $type;
456
457 public $id = -1;
458
459 public $prevMod;
460
461 public $nextMod;
462
463 public $firstOfID = false;
464
465 public $changes;
466
467 function __construct($type) {
468 $this->type = $type;
469 }
470
471 public static function typeToString($type) {
472 switch($type) {
473 case self::NONE: return 'none';
474 case self::REMOVED: return 'removed';
475 case self::ADDED: return 'added';
476 case self::CHANGED: return 'changed';
477 }
478 }
479 }
480
481 class DomTreeBuilder {
482
483 public $textNodes = array();
484
485 public $bodyNode;
486
487 private $currentParent;
488
489 private $newWord = "";
490
491 protected $bodyStarted = false;
492
493 protected $bodyEnded = false;
494
495 private $whiteSpaceBeforeThis = false;
496
497 private $lastSibling;
498
499 private $notInPre = true;
500
501 function __construct() {
502 $this->bodyNode = $this->currentParent = new BodyNode();
503 $this->lastSibling = new DummyNode();
504 }
505
506 /**
507 * Must be called manually
508 */
509 public function endDocument() {
510 $this->endWord();
511 //wfDebug(count($this->textNodes) . ' text nodes in document.');
512 }
513
514 public function startElement($parser, $name, /*array*/ $attributes) {
515 if (strcasecmp($name, 'body') != 0) {
516 //wfDebug("Starting $name node.");
517 $this->endWord();
518
519 $newNode = new TagNode($this->currentParent, $name, $attributes);
520 $this->currentParent->children[] = $newNode;
521 $this->currentParent = $newNode;
522 $this->lastSibling = new DummyNode();
523 if ($this->whiteSpaceBeforeThis && !in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
524 $this->currentParent->whiteBefore = true;
525 }
526 $this->whiteSpaceBeforeThis = false;
527 if(strcasecmp($name, 'pre') == 0) {
528 $this->notInPre = false;
529 }
530 }
531 }
532
533 public function endElement($parser, $name) {
534 if(strcasecmp($name, 'body') != 0) {
535 //wfDebug("Ending $name node.");
536 if (0 == strcasecmp($name,'img')) {
537 // Insert a dummy leaf for the image
538 $img = new ImageNode($this->currentParent, $this->currentParent->attributes);
539 $this->currentParent->children[] = $img;
540 $img->whiteBefore = $this->whiteSpaceBeforeThis;
541 $this->lastSibling = $img;
542 $this->textNodes[] = $img;
543 }
544 $this->endWord();
545 if (!in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
546 $this->lastSibling = $this->currentParent;
547 } else {
548 $this->lastSibling = new DummyNode();
549 }
550 $this->currentParent = $this->currentParent->parent;
551 $this->whiteSpaceBeforeThis = false;
552 if (!$this->notInPre && strcasecmp($name, 'pre') == 0) {
553 $this->notInPre = true;
554 }
555 } else {
556 $this->endDocument();
557 }
558 }
559
560 const regex = '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1})/';
561 const whitespace = '/^[\s]{1}$/';
562 const delimiter = '/^[\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1}$/';
563
564 public function characters($parser, $data) {
565 $matches = preg_split(self::regex, $data, -1, PREG_SPLIT_DELIM_CAPTURE);
566
567 foreach($matches as &$word) {
568 if (preg_match(self::whitespace, $word) && $this->notInPre) {
569 $this->endWord();
570 $this->lastSibling->whiteAfter = true;
571 $this->whiteSpaceBeforeThis = true;
572 } else if (preg_match(self::delimiter, $word)) {
573 $this->endWord();
574 $textNode = new TextNode($this->currentParent, $word);
575 $this->currentParent->children[] = $textNode;
576 $textNode->whiteBefore = $this->whiteSpaceBeforeThis;
577 $this->whiteSpaceBeforeThis = false;
578 $this->lastSibling = $textNode;
579 $this->textNodes[] = $textNode;
580 } else {
581 $this->newWord .= $word;
582 }
583 }
584 }
585
586 private function endWord() {
587 if (!empty($this->newWord)) {
588 $node = new TextNode($this->currentParent, $this->newWord);
589 $this->currentParent->children[] = $node;
590 $node->whiteBefore = $this->whiteSpaceBeforeThis;
591 $this->whiteSpaceBeforeThis = false;
592 $this->lastSibling = $node;
593 $this->textNodes[] = $node;
594 $this->newWord = "";
595 }
596 }
597
598 public function getDiffLines() {
599 return array_map(array('TextNode','toDiffLine'), $this->textNodes);
600 }
601 }
602
603 class TextNodeDiffer {
604
605 private $textNodes;
606 public $bodyNode;
607
608 private $oldTextNodes;
609 private $oldBodyNode;
610
611 private $lastModified = array();
612
613 private $newID = 0;
614
615 private $changedID = 0;
616
617 private $changedIDUsed = false;
618
619 // used to remove the whitespace between a red and green block
620 private $whiteAfterLastChangedPart = false;
621
622 private $deletedID = 0;
623
624 function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
625 $this->textNodes = $tree->textNodes;
626 $this->bodyNode = $tree->bodyNode;
627 $this->oldTextNodes = $oldTree->textNodes;
628 $this->oldBodyNode = $oldTree->bodyNode;
629 }
630
631 public function markAsNew($start, $end) {
632 if ($end <= $start) {
633 return;
634 }
635
636 if ($this->whiteAfterLastChangedPart) {
637 $this->textNodes[$start]->whiteBefore = false;
638 }
639
640 $nextLastModified = array();
641
642 for ($i = $start; $i < $end; ++$i) {
643 $mod = new Modification(Modification::ADDED);
644 $mod->id = $this->newID;
645 if (count($this->lastModified) > 0) {
646 $mod->prevMod = $this->lastModified[0];
647 if (is_null($this->lastModified[0]->nextMod )) {
648 foreach ($this->lastModified as &$lastMod) {
649 $lastMod->nextMod = $mod;
650 }
651 }
652 }
653 $nextLastModified[] = $mod;
654 $this->textNodes[$i]->modification = $mod;
655 }
656 if ($start < $end) {
657 $this->textNodes[$start]->modification->firstOfID = true;
658 }
659 ++$this->newID;
660 $this->lastModified = $nextLastModified;
661 }
662
663 public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
664 $i = $rightstart;
665 $j = $leftstart;
666
667 if ($this->changedIDUsed) {
668 ++$this->changedID;
669 $this->changedIDUsed = false;
670 }
671
672 $nextLastModified = array();
673
674 $changes;
675 while ($i < $rightend) {
676 $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
677 $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
678 $result = $acthis->getResult($acother);
679 unset($acthis, $acother);
680
681 $nbLastModified = count($this->lastModified);
682 if ($result->changed) {
683 $mod = new Modification(Modification::CHANGED);
684
685 if (!$this->changedIDUsed) {
686 $mod->firstOfID = true;
687 if (count($nextLastModified) > 0) {
688 $this->lastModified = $nextLastModified;
689 $nextLastModified = array();
690 }
691 } else if (!is_null($result->changes) && $result->changes !== $this->changes) {
692 ++$this->changedID;
693 $mod->firstOfID = true;
694 if (count($nextLastModified) > 0) {
695 $this->lastModified = $nextLastModified;
696 $nextLastModified = array();
697 }
698 }
699
700 if ($nbLastModified > 0) {
701 $mod->prevMod = $this->lastModified[0];
702 if (is_null($this->lastModified[0]->nextMod )) {
703 foreach ($this->lastModified as &$lastMod) {
704 $lastMod->nextMod = $mod;
705 }
706 }
707 }
708 $nextLastModified[] = $mod;
709
710 $mod->changes = $result->changes;
711 $mod->id = $this->changedID;
712
713 $this->textNodes[$i]->modification = $mod;
714 $this->changes = $result->changes;
715 $this->changedIDUsed = true;
716 } else if ($this->changedIDUsed) {
717 ++$this->changedID;
718 $this->changedIDUsed = false;
719 }
720 ++$i;
721 ++$j;
722 }
723 if (count($nextLastModified) > 0) {
724 $this->lastModified = $nextLastModified;
725 }
726 }
727
728 public function markAsDeleted($start, $end, $before) {
729
730 if ($end <= $start) {
731 return;
732 }
733
734 if ($before > 0 && $this->textNodes[$before - 1]->whiteAfter) {
735 $this->whiteAfterLastChangedPart = true;
736 } else {
737 $this->whiteAfterLastChangedPart = false;
738 }
739
740 $nextLastModified = array();
741
742 for ($i = $start; $i < $end; ++$i) {
743 $mod = new Modification(Modification::REMOVED);
744 $mod->id = $this->deletedID;
745 if (count($this->lastModified) > 0) {
746 $mod->prevMod = $this->lastModified[0];
747 if (is_null($this->lastModified[0]->nextMod )) {
748 foreach ($this->lastModified as &$lastMod) {
749 $lastMod->nextMod = $mod;
750 }
751 }
752 }
753 $nextLastModified[] = $mod;
754
755 // oldTextNodes is used here because we're going to move its deleted
756 // elements to this tree!
757 $this->oldTextNodes[$i]->modification = $mod;
758 }
759 $this->oldTextNodes[$start]->modification->firstOfID = true;
760
761 $root = $this->oldTextNodes[$start]->getLastCommonParent($this->oldTextNodes[$end-1])->parent;
762
763 $junk1 = $junk2 = null;
764 $deletedNodes = $root->getMinimalDeletedSet($this->deletedID, $junk1, $junk2);
765
766 //wfDebug("Minimal set of deleted nodes of size " . count($deletedNodes));
767
768 // Set prevLeaf to the leaf after which the old HTML needs to be
769 // inserted
770 if ($before > 0) {
771 $prevLeaf = $this->textNodes[$before - 1];
772 }
773 // Set nextLeaf to the leaf before which the old HTML needs to be
774 // inserted
775 if ($before < count($this->textNodes)) {
776 $nextLeaf = $this->textNodes[$before];
777 }
778
779 while (count($deletedNodes) > 0) {
780 if (isset($prevLeaf)) {
781 $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
782 } else {
783 $prevResult = new LastCommonParentResult();
784 $prevResult->parent = $this->bodyNode;
785 $prevResult->indexInLastCommonParent = 0;
786 }
787 if (isset($nextleaf)) {
788 $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[count($deletedNodes) - 1]);
789 } else {
790 $nextResult = new LastCommonParentResult();
791 $nextResult->parent = $this->bodyNode;
792 $nextResult->indexInLastCommonParent = $this->bodyNode->getNbChildren();
793 }
794
795 if ($prevResult->lastCommonParentDepth == $nextResult->lastCommonParentDepth) {
796 // We need some metric to choose which way to add-...
797 if ($deletedNodes[0]->parent === $deletedNodes[count($deletedNodes) - 1]->parent
798 && $prevResult->parent === $nextResult->parent) {
799 // The difference is not in the parent
800 $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
801 } else {
802 // The difference is in the parent, so compare them
803 // now THIS is tricky
804 $distancePrev = $deletedNodes[0]->parent->getMatchRatio($prevResult->parent);
805 $distanceNext = $deletedNodes[count($deletedNodes) - 1]->parent->getMatchRatio($nextResult->parent);
806
807 if ($distancePrev <= $distanceNext) {
808 $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
809 } else {
810 $nextResult->lastCommonParentDepth = $nextResult->lastCommonParentDepth + 1;
811 }
812 }
813
814 }
815
816 if ($prevResult->lastCommonParentDepth > $nextResult->lastCommonParentDepth) {
817 // Inserting at the front
818 if ($prevResult->splittingNeeded) {
819 $prevLeaf->parent->splitUntil($prevResult->parent, $prevLeaf, true);
820 }
821 $prevLeaf = $deletedNodes[0]->copyTree();
822 unset($deletedNodes[0]);
823 $deletedNodes = array_values($deletedNodes);
824 $prevLeaf->setParent($prevResult->parent);
825 $prevResult->parent->addChildAbsolute($prevLeaf,$prevResult->indexInLastCommonParent + 1);
826 } else if ($prevResult->lastCommonParentDepth < $nextResult->lastCommonParentDepth) {
827 // Inserting at the back
828 if ($nextResult->splittingNeeded) {
829 $splitOccured = $nextLeaf->parent->splitUntil($nextResult->parent, $nextLeaf, false);
830 if ($splitOccured) {
831 // The place where to insert is shifted one place to the
832 // right
833 $nextResult->indexInLastCommonParent = $nextResult->indexInLastCommonParent + 1;
834 }
835 }
836 $nextLeaf = $deletedNodes[count(deletedNodes) - 1]->copyTree();
837 unset($deletedNodes[count(deletedNodes) - 1]);
838 $deletedNodes = array_values($deletedNodes);
839 $nextLeaf->setParent($nextResult->parent);
840 $nextResult->parent->addChildAbsolute($nextLeaf,$nextResult->indexInLastCommonParent);
841 } else {
842 throw new Exception("Uh?");
843 }
844 }
845 $this->lastModified = $nextLastModified;
846 ++$this->deletedID;
847 }
848
849 public function expandWhiteSpace() {
850 $this->bodyNode->expandWhiteSpace();
851 }
852
853 public function lengthNew(){
854 return count($this->textNodes);
855 }
856
857 public function lengthOld(){
858 return count($this->oldTextNodes);
859 }
860 }
861
862 class HTMLDiffer {
863
864 private $output;
865
866 function __construct($output) {
867 $this->output = $output;
868 }
869
870 function htmlDiff($from, $to) {
871 wfProfileIn( __METHOD__ );
872 // Create an XML parser
873 $xml_parser = xml_parser_create('');
874
875 $domfrom = new DomTreeBuilder();
876
877 // Set the functions to handle opening and closing tags
878 xml_set_element_handler($xml_parser, array($domfrom, "startElement"), array($domfrom, "endElement"));
879
880 // Set the function to handle blocks of character data
881 xml_set_character_data_handler($xml_parser, array($domfrom, "characters"));
882
883 //wfDebug('Parsing '.strlen($from)." characters worth of HTML\n");
884 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
885 || !xml_parse($xml_parser, $from, false)
886 || !xml_parse($xml_parser, '</body>', true)){
887 $error = xml_error_string(xml_get_error_code($xml_parser));
888 $line = xml_get_current_line_number($xml_parser);
889 wfDebug("XML error: $error at line $line\n");
890 }
891 xml_parser_free($xml_parser);
892 unset($from);
893
894 $xml_parser = xml_parser_create('');
895
896 $domto = new DomTreeBuilder();
897
898 // Set the functions to handle opening and closing tags
899 xml_set_element_handler($xml_parser, array($domto, "startElement"), array($domto, "endElement"));
900
901 // Set the function to handle blocks of character data
902 xml_set_character_data_handler($xml_parser, array($domto, "characters"));
903
904 //wfDebug('Parsing '.strlen($to)." characters worth of HTML\n");
905 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
906 || !xml_parse($xml_parser, $to, false)
907 || !xml_parse($xml_parser, '</body>', true)){
908 $error = xml_error_string(xml_get_error_code($xml_parser));
909 $line = xml_get_current_line_number($xml_parser);
910 wfDebug("XML error: $error at line $line\n");
911 }
912 xml_parser_free($xml_parser);
913 unset($to);
914
915 $diffengine = new WikiDiff3();
916 $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
917 unset($xml_parser, $diffengine);
918
919 $domdiffer = new TextNodeDiffer($domto, $domfrom);
920
921 $currentIndexLeft = 0;
922 $currentIndexRight = 0;
923 foreach ($differences as &$d) {
924 if ($d->leftstart > $currentIndexLeft) {
925 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
926 $currentIndexRight, $d->rightstart);
927 }
928 if ($d->leftlength > 0) {
929 $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
930 }
931 $domdiffer->markAsNew($d->rightstart, $d->rightend);
932
933 $currentIndexLeft = $d->leftend;
934 $currentIndexRight = $d->rightend;
935 }
936 $oldLength = $domdiffer->lengthOld();
937 if ($currentIndexLeft < $oldLength) {
938 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $oldLength, $currentIndexRight, $domdiffer->lengthNew());
939 }
940 $domdiffer->expandWhiteSpace();
941 $output = new HTMLOutput('htmldiff', $this->output);
942 $output->parse($domdiffer->bodyNode);
943 wfProfileOut( __METHOD__ );
944 }
945
946 private function preProcess(/*array*/ $differences) {
947 $newRanges = array();
948
949 $nbDifferences = count($differences);
950 for ($i = 0; $i < $nbDifferences; ++$i) {
951 $leftStart = $differences[$i]->leftstart;
952 $leftEnd = $differences[$i]->leftend;
953 $rightStart = $differences[$i]->rightstart;
954 $rightEnd = $differences[$i]->rightend;
955
956 $leftLength = $leftEnd - $leftStart;
957 $rightLength = $rightEnd - $rightStart;
958
959 while ($i + 1 < $nbDifferences && self::score($leftLength,
960 $differences[$i + 1]->leftlength,
961 $rightLength,
962 $differences[$i + 1]->rightlength)
963 > ($differences[$i + 1]->leftstart - $leftEnd)) {
964 $leftEnd = $differences[$i + 1]->leftend;
965 $rightEnd = $differences[$i + 1]->rightend;
966 $leftLength = $leftEnd - $leftStart;
967 $rightLength = $rightEnd - $rightStart;
968 ++$i;
969 }
970 $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
971 }
972 return $newRanges;
973 }
974
975 /**
976 * Heuristic to merge differences for readability.
977 */
978 public static function score($ll, $nll, $rl, $nrl) {
979 if (($ll == 0 && $nll == 0)
980 || ($rl == 0 && $nrl == 0)) {
981 return 0;
982 }
983 $numbers = array($ll, $nll, $rl, $nrl);
984 $d = 0;
985 foreach ($numbers as &$number) {
986 while ($number > 3) {
987 $d += 3;
988 $number -= 3;
989 $number *= 0.5;
990 }
991 $d += $number;
992
993 }
994 return $d / (1.5 * count($numbers));
995 }
996
997 }
998
999 class TextOnlyComparator {
1000
1001 public $leafs = array();
1002
1003 function _construct(TagNode $tree) {
1004 $this->addRecursive($tree);
1005 $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
1006 }
1007
1008 private function addRecursive(TagNode $tree) {
1009 foreach ($tree->children as &$child) {
1010 if ($child instanceof TagNode) {
1011 $this->addRecursive($child);
1012 } else if ($child instanceof TextNode) {
1013 $this->leafs[] = $node;
1014 }
1015 }
1016 }
1017
1018 public function getMatchRatio(TextOnlyComparator $other) {
1019 $nbOthers = count($other->leafs);
1020 $nbThis = count($this->leafs);
1021 if($nbOthers == 0 || $nbThis == 0){
1022 return -log(0);
1023 }
1024
1025 $diffengine = new WikiDiff3(25000, 1.35);
1026 $diffengine->diff($this->leafs, $other->leafs);
1027
1028 $lcsLength = $diffengine->getLcsLength();
1029
1030 $distanceThis = $nbThis-$lcsLength;
1031
1032 return (2.0 - $lcsLength/$nbOthers - $lcsLength/$nbThis) / 2.0;
1033 }
1034 }
1035
1036 class AncestorComparatorResult {
1037
1038 public $changed = false;
1039
1040 public $changes = "";
1041 }
1042
1043 /**
1044 * A comparator used when calculating the difference in ancestry of two Nodes.
1045 */
1046 class AncestorComparator {
1047
1048 public $ancestors;
1049 public $ancestorsText;
1050
1051 function __construct(/*array*/ $ancestors) {
1052 $this->ancestors = $ancestors;
1053 $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
1054 }
1055
1056 public $compareTxt = "";
1057
1058 public function getResult(AncestorComparator $other) {
1059 $result = new AncestorComparatorResult();
1060
1061 $diffengine = new WikiDiff3(10000, 1.35);
1062 $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText);
1063
1064 if (count($differences) == 0){
1065 return $result;
1066 }
1067 $changeTxt = new ChangeTextGenerator($this, $other);
1068
1069 $result->changed = true;
1070 $result->changes = $changeTxt->getChanged($differences)->toString();
1071
1072 return $result;
1073 }
1074 }
1075
1076 class ChangeTextGenerator {
1077
1078 private $new;
1079 private $old;
1080
1081 private $factory;
1082
1083 function __construct(AncestorComparator $old, AncestorComparator $new) {
1084 $this->new = $new;
1085 $this->old = $old;
1086 $this->factory = new TagToStringFactory();
1087 }
1088
1089 public function getChanged(/*array*/ $differences) {
1090 $txt = new ChangeText;
1091
1092 $rootlistopened = false;
1093
1094 if (count($differences) > 1) {
1095 $txt->addHtml('<ul class="changelist">');
1096 $rootlistopened = true;
1097 }
1098
1099 $nbDifferences = count($differences);
1100 for ($j = 0; $j < $nbDifferences; ++$j) {
1101 $d = $differences[$j];
1102
1103 $lvl1listopened = false;
1104
1105 if ($rootlistopened) {
1106 $txt->addHtml('<li>');
1107 }
1108
1109 if ($d->leftlength + $d->rightlength > 1) {
1110 $txt->addHtml('<ul class="changelist">');
1111 $lvl1listopened = true;
1112 }
1113
1114 // left are the old ones
1115 for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
1116 if ($lvl1listopened){
1117 $txt->addHtml('<li>');
1118 }
1119 // add a bullet for a old tag
1120 $this->addTagOld($txt, $this->old->ancestors[$i]);
1121
1122 if ($lvl1listopened){
1123 $txt->addHtml('</li>');
1124 }
1125 }
1126
1127 // right are the new ones
1128 for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
1129 if ($lvl1listopened){
1130 $txt->addHtml('<li>');
1131 }
1132
1133 // add a bullet for a new tag
1134 $this->addTagNew($txt, $this->new->ancestors[$i]);
1135
1136 if ($lvl1listopened){
1137 $txt->addHtml('</li>');
1138 }
1139
1140 }
1141
1142 if ($lvl1listopened) {
1143 $txt->addHtml('</ul>');
1144 }
1145
1146 if ($rootlistopened) {
1147 $txt->addHtml('</li>');
1148 }
1149 }
1150
1151 if ($rootlistopened) {
1152 $txt->addHtml('</ul>');
1153 }
1154
1155 return $txt;
1156
1157 }
1158
1159 private function addTagOld(ChangeText $txt, TagNode $ancestor) {
1160 $this->factory->create($ancestor)->getRemovedDescription($txt);
1161 }
1162
1163 private function addTagNew(ChangeText $txt, TagNode $ancestor) {
1164 $this->factory->create($ancestor)->getAddedDescription($txt);
1165 }
1166 }
1167
1168 class ChangeText {
1169
1170 private $txt = "";
1171
1172 const newLine = "<br/>";
1173
1174 public function addText($s) {
1175 $s = $this->clean($s);
1176 $this->txt .= $s;
1177 }
1178
1179 public function addHtml($s) {
1180 $this->txt .= $s;
1181 }
1182
1183 public function addNewLine() {
1184 $this->addHtml(self::newLine);
1185 }
1186
1187 public function toString() {
1188 return $this->txt;
1189 }
1190
1191 private function clean($s) {
1192 return htmlspecialchars($s);
1193 }
1194 }
1195
1196 class TagToStringFactory {
1197
1198 private static $containerTags = array('html', 'body', 'p', 'blockquote',
1199 'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li',
1200 'table', 'tbody', 'tr', 'td', 'th', 'br', 'hr', 'code', 'dl',
1201 'dt', 'dd', 'input', 'form', 'img', 'span', 'a');
1202
1203 private static $styleTags = array('i', 'b', 'strong', 'em', 'font',
1204 'big', 'del', 'tt', 'sub', 'sup', 'strike');
1205
1206 const MOVED = 1;
1207 const STYLE = 2;
1208 const UNKNOWN = 4;
1209
1210 public function create(TagNode $node) {
1211 $sem = $this->getChangeSemantic($node->qName);
1212 if (strcasecmp($node->qName,'a') == 0) {
1213 return new AnchorToString($node, $sem);
1214 }
1215 if (strcasecmp($node->qName,'img') == 0) {
1216 return new NoContentTagToString($node, $sem);
1217 }
1218 return new TagToString($node, $sem);
1219 }
1220
1221 protected function getChangeSemantic($qname) {
1222 if (in_array(strtolower($qname),self::$containerTags)) {
1223 return self::MOVED;
1224 }
1225 if (in_array(strtolower($qname),self::$styleTags)) {
1226 return self::STYLE;
1227 }
1228 return self::UNKNOWN;
1229 }
1230 }
1231
1232 class TagToString {
1233
1234 protected $node;
1235
1236 protected $sem;
1237
1238 function __construct(TagNode $node, $sem) {
1239 $this->node = $node;
1240 $this->sem = $sem;
1241 }
1242
1243 public function getDescription() {
1244 return $this->getString('diff-' . $this->node->qName);
1245 }
1246
1247 public function getRemovedDescription(ChangeText $txt) {
1248 if ($this->sem == TagToStringFactory::MOVED) {
1249 $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' ');
1250 $txt->addHtml('<b>');
1251 $txt->addText(strtolower($this->getDescription()));
1252 $txt->addHtml('</b>');
1253 } else if ($this->sem == TagToStringFactory::STYLE) {
1254 $txt->addHtml('<b>');
1255 $txt->addText($this->getDescription());
1256 $txt->addHtml('</b>');
1257 $txt->addText(' ' . strtolower($this->getStyleRemoved()));
1258 } else {
1259 $txt->addHtml('<b>');
1260 $txt->addText($this->getDescription());
1261 $txt->addHtml('</b>');
1262 $txt->addText(' ' . strtolower($this->getRemoved()));
1263 }
1264 $this->addAttributes($txt, $this->node->attributes);
1265 $txt->addText('.');
1266 }
1267
1268 public function getAddedDescription(ChangeText $txt) {
1269 if ($this->sem == TagToStringFactory::MOVED) {
1270 $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' ');
1271 $txt->addHtml('<b>');
1272 $txt->addText(strtolower($this->getDescription()));
1273 $txt->addHtml('</b>');
1274 } else if ($this->sem == TagToStringFactory::STYLE) {
1275 $txt->addHtml('<b>');
1276 $txt->addText($this->getDescription());
1277 $txt->addHtml('</b>');
1278 $txt->addText(' ' . strtolower($this->getStyleAdded()));
1279 } else {
1280 $txt->addHtml('<b>');
1281 $txt->addText($this->getDescription());
1282 $txt->addHtml('</b>');
1283 $txt->addText(' ' . strtolower($this->getAdded()));
1284 }
1285 $this->addAttributes($txt, $this->node->attributes);
1286 $txt->addText('.');
1287 }
1288
1289 protected function getMovedTo() {
1290 return $this->getString('diff-movedto');
1291 }
1292
1293 protected function getStyleAdded() {
1294 return $this->getString('diff-styleadded');
1295 }
1296
1297 protected function getAdded() {
1298 return $this->getString('diff-added');
1299 }
1300
1301 protected function getMovedOutOf() {
1302 return $this->getString('diff-movedoutof');
1303 }
1304
1305 protected function getStyleRemoved() {
1306 return $this->getString('diff-styleremoved');
1307 }
1308
1309 protected function getRemoved() {
1310 return $this->getString('diff-removed');
1311 }
1312
1313 protected function addAttributes(ChangeText $txt, array $attributes) {
1314 if (count($attributes) < 1) {
1315 return;
1316 }
1317
1318 $firstOne = true;
1319 $lastKey = null;
1320 foreach ($attributes as $key => &$attr) {
1321 $lastKey = $key;
1322 if($firstOne) {
1323 $firstOne = false;
1324 $txt->addText(' ' . strtolower($this->getWith())
1325 . ' ' . $this->translateArgument($key) . ' '
1326 . $attr);
1327 continue;
1328 }
1329 $txt->addText(', ' . $this->translateArgument($key) . ' '
1330 . $attr);
1331 }
1332
1333 if (count($attributes) > 1) {
1334 $txt->addText(' '
1335 . strtolower($this->getAnd())
1336 . ' '
1337 . $this->translateArgument($lastKey) . ' '
1338 . $attributes[$lastKey]);
1339 }
1340 }
1341
1342 private function getAnd() {
1343 return $this->getString('diff-and');
1344 }
1345
1346 private function getWith() {
1347 return $this->getString('diff-with');
1348 }
1349
1350 protected function translateArgument($name) {
1351 if (strcasecmp($name, 'src') == 0) {
1352 return strtolower($this->getSource());
1353 }
1354 if (strcasecmp($name, 'width') == 0) {
1355 return strtolower($this->getWidth());
1356 }
1357 if (strcasecmp($name, 'height') == 0) {
1358 return strtolower($this->getHeight());
1359 }
1360 return $name;
1361 }
1362
1363 private function getHeight() {
1364 return $this->getString('diff-height');
1365 }
1366
1367 private function getWidth() {
1368 return $this->getString('diff-width');
1369 }
1370
1371 protected function getSource() {
1372 return $this->getString('diff-source');
1373 }
1374
1375 protected function getArticle() {
1376 return $this->getString('diff-' . $this->node->qName . '-article');
1377 }
1378
1379 // FIXME: Use messages for this
1380 public static $bundle = array(
1381 'diff-movedto' => 'Moved to',
1382 'diff-styleadded' => 'Style added',
1383 'diff-added' => 'Added',
1384 'diff-changedto' => 'Changed to',
1385 'diff-movedoutof' => 'Moved out of',
1386 'diff-styleremoved' => 'Style removed',
1387 'diff-removed' => 'Removed',
1388 'diff-changedfrom' => 'Changed from',
1389 'diff-source' => 'Source',
1390 'diff-withdestination' => 'With destination',
1391 'diff-and' => 'And',
1392 'diff-with' => 'With',
1393 'diff-width' => 'Width',
1394 'diff-height' => 'Height',
1395 'diff-html-article' => 'A',
1396 'diff-html' => 'Html page',
1397 'diff-body-article' => 'A',
1398 'diff-body' => 'Html document',
1399 'diff-p-article' => 'A',
1400 'diff-p' => 'Paragraph',
1401 'diff-blockquote-article' => 'A',
1402 'diff-blockquote' => 'Quote',
1403 'diff-h1-article' => 'A',
1404 'diff-h1' => 'Heading (level 1)',
1405 'diff-h2-article' => 'A',
1406 'diff-h2' => 'Heading (level 2)',
1407 'diff-h3-article' => 'A',
1408 'diff-h3' => 'Heading (level 3)',
1409 'diff-h4-article' => 'A',
1410 'diff-h4' => 'Heading (level 4)',
1411 'diff-h5-article' => 'A',
1412 'diff-h5' => 'Heading (level 5)',
1413 'diff-pre-article' => 'A',
1414 'diff-pre' => 'Preformatted block',
1415 'diff-div-article' => 'A',
1416 'diff-div' => 'Division',
1417 'diff-ul-article' => 'An',
1418 'diff-ul' => 'Unordered list',
1419 'diff-ol-article' => 'An',
1420 'diff-ol' => 'Ordered list',
1421 'diff-li-article' => 'A',
1422 'diff-li' => 'List item',
1423 'diff-table-article' => 'A',
1424 'diff-table' => 'Table',
1425 'diff-tbody-article' => 'A',
1426 'diff-tbody' => "Table's content",
1427 'diff-tr-article' => 'A',
1428 'diff-tr' => 'Row',
1429 'diff-td-article' => 'A',
1430 'diff-td' => 'Cell',
1431 'diff-th-article' => 'A',
1432 'diff-th' => 'Header',
1433 'diff-br-article' => 'A',
1434 'diff-br' => 'Break',
1435 'diff-hr-article' => 'A',
1436 'diff-hr' => 'Horizontal rule',
1437 'diff-code-article' => 'A',
1438 'diff-code' => 'Computer code block',
1439 'diff-dl-article' => 'A',
1440 'diff-dl' => 'Definition list',
1441 'diff-dt-article' => 'A',
1442 'diff-dt' => 'Definition term',
1443 'diff-dd-article' => 'A',
1444 'diff-dd' => 'Definition',
1445 'diff-input-article' => 'An',
1446 'diff-input' => 'Input',
1447 'diff-form-article' => 'A',
1448 'diff-form' => 'Form',
1449 'diff-img-article' => 'An',
1450 'diff-img' => 'Image',
1451 'diff-span-article' => 'A',
1452 'diff-span' => 'Span',
1453 'diff-a-article' => 'A',
1454 'diff-a' => 'Link',
1455 'diff-i' => 'Italics',
1456 'diff-b' => 'Bold',
1457 'diff-strong' => 'Strong',
1458 'diff-em' => 'Emphasis',
1459 'diff-font' => 'Font',
1460 'diff-big' => 'Big',
1461 'diff-del' => 'Deleted',
1462 'diff-tt' => 'Fixed width',
1463 'diff-sub' => 'Subscript',
1464 'diff-sup' => 'Superscript',
1465 'diff-strike' => 'Strikethrough'
1466 );
1467
1468 public function getString($key) {
1469 return self::$bundle[$key];
1470 }
1471 }
1472
1473 class NoContentTagToString extends TagToString {
1474
1475 function __construct(TagNode $node, $sem) {
1476 parent::__construct($node, $sem);
1477 }
1478
1479 public function getAddedDescription(ChangeText $txt) {
1480 $txt.addText($this->getChangedTo() . ' ' + strtolower($this->getArticle()) . ' ');
1481 $txt.addHtml('<b>');
1482 $txt.addText(strtolower($this->getDescription()));
1483 $txt.addHtml('</b>');
1484
1485 $this->addAttributes($txt, $this->node->attributes);
1486 $txt.addText('.');
1487 }
1488
1489 private function getChangedTo() {
1490 return $this->getString('diff-changedto');
1491 }
1492
1493 public function getRemovedDescription(ChangeText $txt) {
1494 $txt.addText($this->getChangedFrom() . ' ' + strtolower($this->getArticle()) . ' ');
1495 $txt.addHtml('<b>');
1496 $txt.addText(strtolower($this->getDescription()));
1497 $txt.addHtml('</b>');
1498
1499 $this->addAttributes($txt, $this->node->attributes);
1500 $txt.addText('.');
1501 }
1502
1503 private function getChangedFrom() {
1504 return $this->getString('diff-changedfrom');
1505 }
1506 }
1507
1508 class AnchorToString extends TagToString {
1509
1510 function __construct(TagNode $node, $sem) {
1511 parent::__construct($node, $sem);
1512 }
1513
1514 protected function addAttributes(ChangeText $txt, array $attributes) {
1515 if (array_key_exists('href', $attributes)) {
1516 $txt->addText(' ' . strtolower($this->getWithDestination()) . ' ' . $attributes['href']);
1517 unset($attributes['href']);
1518 }
1519 parent::addAttributes($txt, $attributes);
1520 }
1521
1522 private function getWithDestination() {
1523 return $this->getString('diff-withdestination');
1524 }
1525
1526 }
1527
1528 /**
1529 * Takes a branch root and creates an HTML file for it.
1530 */
1531 class HTMLOutput{
1532
1533 private $prefix;
1534 private $handler;
1535
1536 function __construct($prefix, $handler) {
1537 $this->prefix = $prefix;
1538 $this->handler = $handler;
1539 }
1540
1541 public function parse(TagNode $node) {
1542 $handler = &$this->handler;
1543
1544 if (strcasecmp($node->qName, 'img') != 0 && strcasecmp($node->qName, 'body') != 0) {
1545 $handler->startElement($node->qName, $node->attributes);
1546 }
1547
1548 $newStarted = false;
1549 $remStarted = false;
1550 $changeStarted = false;
1551 $changeTXT = '';
1552
1553 foreach ($node->children as &$child) {
1554 if ($child instanceof TagNode) {
1555 if ($newStarted) {
1556 $handler->endElement('span');
1557 $newStarted = false;
1558 } else if ($changeStarted) {
1559 $handler->endElement('span');
1560 $changeStarted = false;
1561 } else if ($remStarted) {
1562 $handler->endElement('span');
1563 $remStarted = false;
1564 }
1565 $this->parse($child);
1566 } else if ($child instanceof TextNode) {
1567 $mod = $child->modification;
1568
1569 if ($newStarted && ($mod->type != Modification::ADDED || $mod->firstOfID)) {
1570 $handler->endElement('span');
1571 $newStarted = false;
1572 } else if ($changeStarted && ($mod->type != Modification::CHANGED
1573 || $mod->changes != $changeTXT || $mod->firstOfID)) {
1574 $handler->endElement('span');
1575 $changeStarted = false;
1576 } else if ($remStarted && ($mod->type != Modification::REMOVED || $mod ->firstOfID)) {
1577 $handler->endElement('span');
1578 $remStarted = false;
1579 }
1580
1581 // no else because a removed part can just be closed and a new
1582 // part can start
1583 if (!$newStarted && $mod->type == Modification::ADDED) {
1584 $attrs = array('class' => 'diff-html-added');
1585 if ($mod->firstOfID) {
1586 $attrs['id'] = "added-{$this->prefix}-{$mod->id}";
1587 }
1588 $this->addAttributes($mod, $attrs);
1589 $attrs['onclick'] = 'return tipA(constructToolTipA(this));';
1590 $handler->startElement('span', $attrs);
1591 $newStarted = true;
1592 } else if (!$changeStarted && $mod->type == Modification::CHANGED) {
1593 $attrs = array('class' => 'diff-html-changed');
1594 if ($mod->firstOfID) {
1595 $attrs['id'] = "changed-{$this->prefix}-{$mod->id}";
1596 }
1597 $this->addAttributes($mod, $attrs);
1598 $attrs['onclick'] = 'return tipC(constructToolTipC(this));';
1599 $handler->startElement('span', $attrs);
1600
1601 //tooltip
1602 $handler->startElement('span', array('class' => 'tip'));
1603 $handler->characters($mod->changes);
1604 $handler->endElement('span');
1605
1606 $changeStarted = true;
1607 $changeTXT = $mod->changes;
1608 } else if (!$remStarted && $mod->type == Modification::REMOVED) {
1609 $attrs = array('class'=>'diff-html-removed');
1610 if ($mod->firstOfID) {
1611 $attrs['id'] = "removed-{$this->prefix}-{$mod->id}";
1612 }
1613 $this->addAttributes($mod, $attrs);
1614 $attrs['onclick'] = 'return tipR(constructToolTipR(this));';
1615 $handler->startElement('span', $attrs);
1616 $remStarted = true;
1617 }
1618
1619 $chars = $child->text;
1620
1621 if ($child instanceof ImageNode) {
1622 $this->writeImage($child);
1623 } else {
1624 $handler->characters($chars);
1625 }
1626
1627 }
1628 }
1629
1630 if ($newStarted) {
1631 $handler->endElement('span');
1632 $newStarted = false;
1633 } else if ($changeStarted) {
1634 $handler->endElement('span');
1635 $changeStarted = false;
1636 } else if ($remStarted) {
1637 $handler->endElement('span');
1638 $remStarted = false;
1639 }
1640
1641 if (strcasecmp($node->qName, 'img') != 0
1642 && strcasecmp($node->qName, 'body') != 0) {
1643 $handler->endElement($node->qName);
1644 }
1645 }
1646
1647 private function writeImage(ImageNode $imgNode) {
1648 $attrs = $imgNode->attributes;
1649 if ($imgNode->modification->type == Modification::REMOVED) {
1650 $attrs['changeType']='diff-removed-image';
1651 } else if ($imgNode->modification->type == Modification::ADDED) {
1652 $attrs['changeType'] = 'diff-added-image';
1653 }
1654 $attrs['onload'] = 'updateOverlays()';
1655 $attrs['onError'] = 'updateOverlays()';
1656 $attrs['onAbort'] = 'updateOverlays()';
1657 $this->handler->startElement('img', $attrs);
1658 $this->handler->endElement('img');
1659 }
1660
1661 private function addAttributes(Modification $mod, /*array*/ &$attrs) {
1662 if (is_null($mod->prevMod)) {
1663 $previous = 'first-' . $this->prefix;
1664 } else {
1665 $type = Modification::typeToString($mod->prevMod->type);
1666 $previous = "$type-{$this->prefix}-{$mod->prevMod->id}";
1667 }
1668 $attrs['previous'] = $previous;
1669
1670 $type = Modification::typeToString($mod->type);
1671 $changeId = "$type-{$this->prefix}-{$mod->id}";
1672 $attrs['changeId'] = $changeId;
1673
1674 if (is_null($mod->nextMod )) {
1675 $next = "last-{$this->prefix}";
1676 } else {
1677 $type = Modification::typeToString($mod->nextMod->type);
1678 $next = "$type-{$this->prefix}-{$mod->nextMod->id}";
1679 }
1680 $attrs['next'] = $next;
1681 }
1682 }
1683
1684 class EchoingContentHandler {
1685
1686 function startElement($qname, /*array*/ $arguments) {
1687 echo Xml::openElement($qname, $arguments);
1688 }
1689
1690 function endElement($qname){
1691 echo Xml::closeElement($qname);
1692 }
1693
1694 function characters($chars){
1695 echo $chars;
1696 }
1697
1698 }
1699
1700 class DelegatingContentHandler {
1701
1702 private $delegate;
1703
1704 function __construct($delegate) {
1705 $this->delegate = $delegate;
1706 }
1707
1708 function startElement($qname, /*array*/ $arguments) {
1709 $this->delegate->addHtml(Xml::openElement($qname, $arguments));
1710 }
1711
1712 function endElement($qname){
1713 $this->delegate->addHtml(Xml::closeElement($qname));
1714 }
1715
1716 function characters($chars){
1717 $this->delegate->addHtml($chars);
1718 }
1719 }