New implementation of wikidiff3 algorithm replacing the previous one and basic HTML...
[lhc/web/wiklou.git] / includes / HTMLDiff.php
1 <?php
2 /* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * or see http://www.gnu.org/
18 */
19
20
21 /**
22 * Any element in the DOM tree of an HTML document.
23 */
24 abstract class Node{
25
26 protected $parent;
27
28
29 function __construct($parent){
30 $this->parent = $parent;
31 if(!is_null($parent)){
32 $parent->addChild($this);
33 }
34 }
35
36 public function getParent(){
37 return $this->parent;
38 }
39
40 public function getParentTree(){
41 if(!is_null($this->parent)){
42 $parentTree = $this->parent->getParentTree();
43 $parentTree[] = $this->parent;
44 return $parentTree;
45 }else{
46 return array();
47 }
48 }
49
50 public abstract function getMinimalDeletedSet($id);
51
52 public function detectIgnorableWhiteSpace(){
53 // no op
54 }
55
56 public function getLastCommonParent(Node $other){
57 if(is_null($other))
58 throw new Exception('The given node is NULL');
59
60 $result = new LastCommonParentResult();
61
62 $myParents = $this->getParentTree();
63 $otherParents = $other->getParentTree();
64
65 $i = 1;
66 $isSame = true;
67 while ($isSame && $i < sizeof($myParents) && $i < sizeof($otherParents)) {
68 if (!$myParents[$i]->isSameTag($otherParents[$i])) {
69 $isSame = false;
70 } else {
71 // After the while, the index i-1 must be the last common parent
72 $i++;
73 }
74 }
75
76 $result->setLastCommonParentDepth($i - 1);
77 $result->setLastCommonParent($myParents[$i - 1]);
78
79 if (!$isSame) {
80 $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
81 $result->setSplittingNeeded();
82 } else if (sizeof($myParents) < sizeof($otherParents)) {
83 $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
84 } else if (sizeof($myParents) > sizeof($otherParents)) {
85 // All tags matched but there are tags left in this tree
86 $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
87 $result->setSplittingNeeded();
88 } else {
89 // All tags matched untill the very last one in both trees
90 // or there were no tags besides the BODY
91 $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
92 }
93 return $result;
94 }
95
96 public function setParent($parent) {
97 $this->parent = $parent;
98 }
99
100 public abstract function copyTree();
101
102 public function inPre() {
103 $tree = $this->getParentTree();
104 foreach ($tree as $ancestor) {
105 if ($ancestor->isPre()) {
106 return true;
107 }
108 }
109 return false;
110 }
111
112 private $whiteBefore = false;
113
114 private $whiteAfter = false;
115
116 public function isWhiteBefore() {
117 return $this->whiteBefore;
118 }
119
120 public function setWhiteBefore($whiteBefore) {
121 $this->whiteBefore = $whiteBefore;
122 }
123
124 public function isWhiteAfter() {
125 return $this->whiteAfter;
126 }
127
128 public function setWhiteAfter($whiteAfter) {
129 $this->whiteAfter = $whiteAfter;
130 }
131
132 public abstract function getLeftMostChild();
133
134 public abstract function getRightMostChild();
135 }
136
137 /**
138 * Node that can contain other nodes. Represents an HTML tag.
139 */
140 class TagNode extends Node{
141
142 public $children = array();
143
144 protected $qName;
145
146 protected $attributes = array();
147
148 function __construct($parent, $qName, /*array*/ $attributes) {
149 parent::__construct($parent);
150 $this->qName = strtolower($qName);
151 foreach($attributes as $key => $value){
152 $this->attributes[strtolower($key)] = $value;
153 }
154 }
155
156 public function addChild(Node $node, $index=-1) {
157 if ($node->getParent() !== $this)
158 throw new Exception(
159 'The new child must have this node as a parent.');
160 if($index == -1){
161 $this->children[] = $node;
162 }else{
163 array_splice($this->children,$index,0,array($node));
164 }
165 }
166
167 public function getIndexOf(Node $child) {
168 // don't trust array_search with objects
169 foreach($this->children as $key=>$value){
170 if($value === $child){
171 return $key;
172 }
173 }
174 return NULL;
175 }
176
177 public function getChild($i) {
178 return $this->children[$i];
179 }
180
181 public function getNbChildren() {
182 return count($this->children);
183 }
184
185 public function getQName() {
186 return $this->qName;
187 }
188
189 public function getAttributes() {
190 return $this->attributes;
191 }
192
193 public function isSameTag(TagNode $other) {
194 if (is_null($other))
195 return false;
196 return $this->getOpeningTag() === $other->getOpeningTag();
197 }
198
199 public function getOpeningTag() {
200 $s = '<'.$this->getQName();
201 foreach ($this->attributes as $attribute => $value) {
202 $s .= ' ' . $attribute . '="' . $value . '"';
203 }
204 return $s .= '>';
205 }
206
207 public function getEndTag() {
208 return '</' . $this->getQName() + '>"';
209 }
210
211 public function getMinimalDeletedSet($id) {
212 $nodes = array();
213
214 if ($this->getNbChildren() == 0)
215 return $nodes;
216
217 $hasNotDeletedDescendant = false;
218
219 foreach ($this->children as $child) {
220 $childrenChildren = $child->getMinimalDeletedSet($id);
221 $nodes = array_merge($nodes, $childrenChildren);
222 if (!$hasNotDeletedDescendant
223 && !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) {
224 // This child is not entirely deleted
225 $hasNotDeletedDescendant = true;
226 }
227 }
228 if (!$hasNotDeletedDescendant) {
229 $nodes = array($this);
230 }
231 return $nodes;
232 }
233
234 public function toString() {
235 return $this->getOpeningTag();
236 }
237
238 public function splitUntill(TagNode $parent, Node $split, $includeLeft) {
239 $splitOccured = false;
240 if ($parent !== $this) {
241 $part1 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
242 $part2 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
243 $part1->setParent($this->getParent());
244 $part2->setParent($this->getParent());
245
246 $i = 0;
247 $nbChildren = $this->getNbChildren();
248 while ($i < $nbChildren && $this->children[$i] !== $split) {
249 $this->children[$i]->setParent($part1);
250 $part1->addChild($this->children[$i]);
251 ++$i;
252 }
253 if ($i < $nbChildren) {
254 if ($includeLeft) {
255 $this->children[$i]->setParent($part1);
256 $part1->addChild($this->children[$i]);
257 } else {
258 $this->children[$i]->setParent($part2);
259 $part2->addChild($this->children[$i]);
260 }
261 ++$i;
262 }
263 while ($i < $nbChildren) {
264 $this->children[$i]->setParent($part2);
265 $part2->addChild($this->children[$i]);
266 ++$i;
267 }
268 $myindexinparent = $this->parent->getIndexOf($this);
269 if ($part1->getNbChildren() > 0)
270 $this->parent->addChild($part1,$myindexinparent);
271
272 if ($part2->getNbChildren() > 0)
273 $this->parent->addChild($part2,$myindexinparent);
274
275 if ($part1->getNbChildren() > 0 && $part2->getNbChildren() > 0) {
276 $splitOccured = true;
277 }
278
279 $this->parent->removeChild($myindexinparent);
280
281 if ($includeLeft)
282 $this->parent->splitUntill($parent, $part1, $includeLeft);
283 else
284 $this->parent->splitUntill($parent, $part2, $includeLeft);
285 }
286 return $splitOccured;
287
288 }
289
290 private function removeChild($index) {
291 unset($this->children[$index]);
292 $this->children = array_values($this->children);
293 }
294
295 public static $blocks = array('html'=>TRUE,'body'=>TRUE,'p'=>TRUE,'blockquote'=>TRUE,
296 'h1'=>TRUE,'h2'=>TRUE,'h3'=>TRUE,'h4'=>TRUE,'h5'=>TRUE,'pre'=>TRUE,'div'=>TRUE,'ul'=>TRUE,'ol'=>TRUE,'li'=>TRUE,
297 'table'=>TRUE,'tbody'=>TRUE,'tr'=>TRUE,'td'=>TRUE,'th'=>TRUE,'br'=>TRUE);
298
299 public static function isBlockLevelName($qName) {
300 return array_key_exists(strtolower($qName),self::$blocks);
301 }
302
303 public static function isBlockLevelNode(Node $node) {
304 if(! $node instanceof TagNode)
305 return false;
306 return self::isBlockLevelName($node->getQName());
307 }
308
309 public function isBlockLevel() {
310 return  self::isBlockLevelNode($this);
311 }
312
313 public static function isInlineName($qName) {
314 return !self::isBlockLevelName($qName);
315 }
316
317 public static function isInlineNode(Node $node) {
318 return !self::isBlockLevelNode($node);
319 }
320
321 public function isInline() {
322 return self::isInlineNode($this);
323 }
324
325 public function copyTree() {
326 $newThis = new TagNode(NULL, $this->getQName(), $this->getAttributes());
327 $newThis->setWhiteBefore($this->isWhiteBefore());
328 $newThis->setWhiteAfter($this->isWhiteAfter());
329 foreach($this->children as $child) {
330 $newChild = $child->copyTree();
331 $newChild->setParent($newThis);
332 $newThis->addChild($newChild);
333 }
334 return $newThis;
335 }
336
337 public function getMatchRatio(TagNode $other) {
338 $txtComp = new TextOnlyComparator($other);
339 return $txtComp->getMatchRatio(new TextOnlyComparator($this));
340 }
341
342 public function expandWhiteSpace() {
343 $shift = 0;
344 $spaceAdded = false;
345
346 $nbOriginalChildren = $this->getNbChildren();
347 for ($i = 0; $i < $nbOriginalChildren; ++$i) {
348 $child = $this->getChild($i + $shift);
349
350 if($child instanceof TagNode){
351 if (!$child->isPre()) {
352 $child->expandWhiteSpace();
353 }
354 }
355 if (!$spaceAdded && $child->isWhiteBefore()) {
356 $ws = new WhiteSpaceNode(NULL, ' ', $child->getLeftMostChild());
357 $ws->setParent($this);
358 $this->addChild($ws,$i + ($shift++));
359 }
360 if ($child->isWhiteAfter()) {
361 $ws = new WhiteSpaceNode(NULL, ' ', $child->getRightMostChild());
362 $ws->setParent($this);
363 $this->addChild($ws,$i + 1 + ($shift++));
364 $spaceAdded = true;
365 } else {
366 $spaceAdded = false;
367 }
368
369 }
370 }
371
372 public function getLeftMostChild() {
373 if ($this->getNbChildren() < 1)
374 return $this;
375 return $this->getChild(0)->getLeftMostChild();
376
377 }
378
379 public function getRightMostChild() {
380 if ($this->getNbChildren() < 1)
381 return $this;
382 return $this->getChild($this->getNbChildren() - 1)->getRightMostChild();
383 }
384
385 public function isPre() {
386 return 0 == strcasecmp($this->getQName(),'pre');
387 }
388
389 public static function toDiffLine(TagNode $node){
390 return $node->getOpeningTag();
391 }
392 }
393
394 /**
395 * Represents a piece of text in the HTML file.
396 */
397 class TextNode extends Node{
398
399 private $s;
400
401 private $modification;
402
403 function __construct($parent, $s) {
404 parent::__construct($parent);
405 $this->modification = new Modification(Modification::NONE);
406 $this->s = $s;
407 }
408
409 public function copyTree() {
410 $clone = clone $this;
411 $clone->setParent(NULL);
412 return $clone;
413 }
414
415 public function getLeftMostChild() {
416 return $this;
417 }
418
419 public function getRightMostChild() {
420 return $this;
421 }
422
423 public function getMinimalDeletedSet($id) {
424 if ($this->getModification()->getType() == Modification::REMOVED
425 && $this->getModification()->getID() == $id){
426 return array($this);
427 }
428 return array();
429 }
430
431 public function getModification() {
432 return $this->modification;
433 }
434
435 public function getText() {
436 return $this->s;
437 }
438
439 public function isSameText($other) {
440 if (is_null($other) || ! $other instanceof TextNode){
441 return false;
442 }
443 return str_replace('\n', ' ',$this->getText()) === str_replace('\n', ' ',$other->getText());
444 }
445
446 public function setModification(Modification $m) {
447 //wfDebug("Registered modification for node '$this->s' as ".Modification::typeToString($m->getType()));
448 $this->modification = $m;
449 }
450
451 public function toString() {
452 return $this->getText();
453 }
454
455 public static function toDiffLine(TextNode $node){
456 return str_replace('\n', ' ',$node->getText());
457 }
458 }
459
460 class WhiteSpaceNode extends TextNode {
461
462 function __construct($parent, $s, Node $like = NULL) {
463 parent::__construct($parent, $s);
464 if(!is_null($like) && $like instanceof TextNode){
465 $newModification = clone $like->getModification();
466 $newModification->setFirstOfID(false);
467 $this->setModification($newModification);
468 }
469 }
470
471 public static function isWhiteSpace($c) {
472 switch ($c) {
473 case ' ':
474 case '\t':
475 case '\r':
476 case '\n':
477 return true;
478 default:
479 return false;
480 }
481 }
482 }
483
484 /**
485 * Represents the root of a HTML document.
486 */
487 class BodyNode extends TagNode {
488
489 function __construct() {
490 parent::__construct(NULL, 'body', array());
491 }
492
493 public function copyTree() {
494 $newThis = new BodyNode();
495 foreach ($this->children as $child) {
496 $newChild = $child->copyTree();
497 $newChild->setParent($newThis);
498 $newThis->addChild($newChild);
499 }
500 return $newThis;
501 }
502
503 public function getMinimalDeletedSet($id) {
504 $nodes = array();
505 foreach ($this->children as $child) {
506 $childrenChildren = $child->getMinimalDeletedSet($id);
507 $nodes = array_merge($nodes, $childrenChildren);
508 }
509 return $nodes;
510 }
511
512 }
513
514 /**
515 * Represents an image in HTML. Even though images do not contain any text they
516 * are independent visible objects on the page. They are logically a TextNode.
517 */
518 class ImageNode extends TextNode {
519
520 private $attributes;
521
522 function __construct(TagNode $parent, /*array*/ $attrs) {
523 if(!array_key_exists('src',$attrs)){
524 //wfDebug('Image without a source:');
525 foreach($attrs as $key => $value){
526 //wfDebug("$key = $value");
527 }
528 parent::__construct($parent, '<img></img>');
529 }else{
530 parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
531 }
532 $this->attributes = $attrs;
533 }
534
535 public function isSameText($other) {
536 if (is_null($other) || ! $other instanceof ImageNode)
537 return false;
538 return $this->getText() === $other->getText();
539 }
540
541 public function getAttributes() {
542 return $this->attributes;
543 }
544
545 }
546
547 /**
548 * When detecting the last common parent of two nodes, all results are stored as
549 * a LastCommonParentResult.
550 */
551 class LastCommonParentResult {
552
553 // Parent
554 private $parent;
555
556 public function getLastCommonParent() {
557 return $this->parent;
558 }
559
560 public function setLastCommonParent(TagNode $parent) {
561 $this->parent = $parent;
562 }
563
564 // Splitting
565 private $splittingNeeded = false;
566
567 public function isSplittingNeeded() {
568 return $this->splittingNeeded;
569 }
570
571 public function setSplittingNeeded() {
572 $this->splittingNeeded = true;
573 }
574
575 // Depth
576 private $lastCommonParentDepth = -1;
577
578 public function getLastCommonParentDepth() {
579 return $this->lastCommonParentDepth;
580 }
581
582 public function setLastCommonParentDepth($depth) {
583 $this->lastCommonParentDepth = $depth;
584 }
585
586 // Index
587 private $indexInLastCommonParent = -1;
588
589 public function getIndexInLastCommonParent() {
590 return $this->indexInLastCommonParent;
591 }
592
593 public function setIndexInLastCommonParent($index) {
594 $this->indexInLastCommonParent = $index;
595 }
596 }
597
598 class Modification{
599
600 const NONE = 1;
601 const REMOVED = 2;
602 const ADDED = 4;
603 const CHANGED = 8;
604
605 private $type;
606
607 private $id = -1;
608
609 private $prevMod;
610
611 private $nextMod;
612
613 private $firstOfID = false;
614
615 function __construct($type) {
616 $this->type = $type;
617 }
618
619 public function copy() {
620 $newM = new Modification($this->getType());
621 $newM->setID($this->getID());
622 $newM->setChanges($this->getChanges());
623 $newM->setFirstOfID($this->isFirstOfID());
624 $newM->setNext($this->getNext());
625 $newM->setPrevious($this->getPrevious());
626 return $newM;
627 }
628
629 public function getType() {
630 return $this->type;
631 }
632
633 public function setID($id) {
634 $this->id = $id;
635 }
636
637 public function getID() {
638 return $this->id;
639 }
640
641 public function setPrevious($m) {
642 $this->prevMod = $m;
643 }
644
645 public function getPrevious() {
646 return $this->prevMod;
647 }
648
649 public function setNext($m) {
650 $this->nextMod = $m;
651 }
652
653 public function getNext() {
654 return $this->nextMod;
655 }
656
657 private $changes;
658
659 public function setChanges($changes) {
660 $this->changes = $changes;
661 }
662
663 public function getChanges() {
664 return $this->changes;
665 }
666
667 public function isFirstOfID() {
668 return $this->firstOfID;
669 }
670
671 public function setFirstOfID($firstOfID) {
672 $this->firstOfID = $firstOfID;
673 }
674
675 public static function typeToString($type){
676 switch($type){
677 case self::NONE: return 'none';
678 case self::REMOVED: return 'removed';
679 case self::ADDED: return 'added';
680 case self::CHANGED: return 'changed';
681 }
682 }
683 }
684
685 class DomTreeBuilder {
686
687 private $textNodes = array();
688
689 private $bodyNode;
690
691 private $currentParent;
692
693 private $newWord = "";
694
695 protected $bodyStarted = false;
696
697 protected $bodyEnded = false;
698
699 private $whiteSpaceBeforeThis = false;
700
701 private $lastSibling;
702
703 function __construct(){
704 $this->bodyNode = $this->currentParent = new BodyNode();
705 }
706
707 public function getBodyNode() {
708 return $this->bodyNode;
709 }
710
711 public function getTextNodes() {
712 return $this->textNodes;
713 }
714
715 /**
716 * Must be called manually
717 */
718 public function endDocument() {
719 $this->endWord();
720 //wfDebug(sizeof($this->textNodes) . ' text nodes in document.');
721 }
722
723 public function startElement($parser, $name, /*array*/ $attributes) {
724 if(!strcasecmp($name, 'body')==0){
725 //wfDebug("Starting $name node.");
726 $this->endWord();
727
728 $newTagNode = new TagNode($this->currentParent, $name, $attributes);
729 $this->currentParent = $newTagNode;
730 $this->lastSibling = NULL;
731 if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) {
732 $newTagNode->setWhiteBefore(true);
733 }
734 $this->whiteSpaceBeforeThis = false;
735 }
736 }
737
738 public function endElement($parser, $name) {
739 if(!strcasecmp($name, 'body')==0){
740 //wfDebug("Ending $name node.");
741 if (0 == strcasecmp($name,'img')) {
742 // Insert a dummy leaf for the image
743 $img = new ImageNode($this->currentParent, $this->currentParent->getAttributes());
744 $img->setWhiteBefore($this->whiteSpaceBeforeThis);
745 $this->lastSibling = $img;
746 $this->textNodes[] = $img;
747 }
748 $this->endWord();
749 if ($this->currentParent->isInline()) {
750 $this->lastSibling = $this->currentParent;
751 } else {
752 $this->lastSibling = NULL;
753 }
754 $this->currentParent = $this->currentParent->getParent();
755 $this->whiteSpaceBeforeThis = false;
756 }else{
757 $this->endDocument();
758 }
759 }
760
761 public function characters($parser, $data){
762 //wfDebug('Parsing '. strlen($data).' characters.');
763 $array = str_split($data);
764 foreach($array as $c) {
765 if (self::isDelimiter($c)) {
766 $this->endWord();
767 if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre()
768 && !$this->currentParent->inPre()) {
769 if (!is_null($this->lastSibling)){
770 $this->lastSibling->setWhiteAfter(true);
771 }
772 $this->whiteSpaceBeforeThis = true;
773 } else {
774 $textNode = new TextNode($this->currentParent, $c);
775 $textNode->setWhiteBefore($this->whiteSpaceBeforeThis);
776 $this->whiteSpaceBeforeThis = false;
777 $this->lastSibling = $textNode;
778 $this->textNodes[] = $textNode;
779 }
780 } else {
781 $this->newWord .= $c;
782 }
783
784 }
785 }
786
787 private function endWord() {
788 if (strlen($this->newWord) > 0) {
789 $node = new TextNode($this->currentParent, $this->newWord);
790 $node->setWhiteBefore($this->whiteSpaceBeforeThis);
791 $this->whiteSpaceBeforeThis = false;
792 $this->lastSibling = $node;
793 $this->textNodes[] = $node;
794 $this->newWord = "";
795 }
796 }
797
798 public static function isDelimiter($c) {
799 switch ($c) {
800 // Basic Delimiters
801 case '/':
802 case '.':
803 case '!':
804 case ',':
805 case ';':
806 case '?':
807 case '=':
808 case '\'':
809 case '"':
810 // Extra Delimiters
811 case '[':
812 case ']':
813 case '{':
814 case '}':
815 case '(':
816 case ')':
817 case '&':
818 case '|':
819 case '\\':
820 case '-':
821 case '_':
822 case '+':
823 case '*':
824 case ':':
825 return true;
826 default:
827 return WhiteSpaceNode::isWhiteSpace($c);
828 }
829 }
830
831 public function getDiffLines(){
832 return array_map(array('TextNode','toDiffLine'), $this->textNodes);
833 }
834 }
835
836 class TextNodeDiffer{
837
838 private $textNodes;
839 private $bodyNode;
840
841 private $oldTextNodes;
842 private $oldBodyNode;
843
844 private $lastModified = array();
845
846 function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
847 $this->textNodes = $tree->getTextNodes();
848 $this->bodyNode = $tree->getBodyNode();
849 $this->oldTextNodes = $oldTree->getTextNodes();
850 $this->oldBodyNode = $oldTree->getBodyNode();
851 }
852
853 public function getBodyNode() {
854 return $this->bodyNode;
855 }
856
857 private $newID = 0;
858
859 public function markAsNew($start, $end) {
860 if ($end <= $start)
861 return;
862
863 if ($this->whiteAfterLastChangedPart)
864 $this->textNodes[$start]->setWhiteBefore(false);
865
866 $nextLastModified = array();
867
868 for ($i = $start; $i < $end; ++$i) {
869 $mod = new Modification(Modification::ADDED);
870 $mod->setID($this->newID);
871 if (sizeof($this->lastModified) > 0) {
872 $mod->setPrevious($this->lastModified[0]);
873 if (is_null($this->lastModified[0]->getNext())) {
874 foreach ($this->lastModified as $lastMod) {
875 $lastMod->setNext($mod);
876 }
877 }
878 }
879 $nextLastModified[] = $mod;
880 $this->textNodes[$i]->setModification($mod);
881 }
882 if ($start < $end) {
883 $this->textNodes[$start]->getModification()->setFirstOfID(true);
884 }
885 ++$this->newID;
886 $this->lastModified = $nextLastModified;
887 }
888
889 private $changedID = 0;
890
891 private $changedIDUsed = false;
892
893 public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
894 $i = $rightstart;
895 $j = $leftstart;
896
897 if ($this->changedIDUsed) {
898 ++$this->changedID;
899 $this->changedIDUsed = false;
900 }
901
902 $nextLastModified = array();
903
904 $changes;
905 while ($i < $rightend) {
906 $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
907 $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
908 $result = $acthis->getResult($acother);
909 unset($acthis, $acother);
910
911 $nbLastModified = sizeof($this->lastModified);
912 if ($result->isChanged()) {
913 $mod = new Modification(Modification::CHANGED);
914
915 if (!$this->changedIDUsed) {
916 $mod->setFirstOfID(true);
917 if (sizeof($nextLastModified) > 0) {
918 $this->lastModified = $nextLastModified;
919 $nextLastModified = array();
920 }
921 } else if (!is_null($result->getChanges()) && $result->getChanges() != $this->changes) {
922 ++$this->changedID;
923 $mod->setFirstOfID(true);
924 if (sizeof($nextLastModified) > 0) {
925 $this->lastModified = $nextLastModified;
926 $nextLastModified = array();
927 }
928 }
929
930 if ($nbLastModified > 0) {
931 $mod->setPrevious($this->lastModified[0]);
932 if (is_null($this->lastModified[0]->getNext())) {
933 foreach ($this->lastModified as $lastMod) {
934 $lastMod->setNext($mod);
935 }
936 }
937 }
938 $nextLastModified[] = $mod;
939
940 $mod->setChanges($result->getChanges());
941 $mod->setID($this->changedID);
942
943 $this->textNodes[$i]->setModification($mod);
944 $this->changes = $result->getChanges();
945 $this->changedIDUsed = true;
946 } else if ($this->changedIDUsed) {
947 ++$this->changedID;
948 $this->changedIDUsed = false;
949 }
950 ++$i;
951 ++$j;
952 }
953 if (sizeof($nextLastModified) > 0){
954 $this->lastModified = $nextLastModified;
955 }
956 }
957
958 // used to remove the whitespace between a red and green block
959 private $whiteAfterLastChangedPart = false;
960
961 private $deletedID = 0;
962
963 public function markAsDeleted($start, $end, $before) {
964
965 if ($end <= $start)
966 return;
967
968 if ($before > 0 && $this->textNodes[$before - 1]->isWhiteAfter()) {
969 $this->whiteAfterLastChangedPart = true;
970 } else {
971 $this->whiteAfterLastChangedPart = false;
972 }
973
974 $nextLastModified = array();
975
976 for ($i = $start; $i < $end; ++$i) {
977 $mod = new Modification(Modification::REMOVED);
978 $mod->setID($this->deletedID);
979 if (sizeof($this->lastModified) > 0) {
980 $mod->setPrevious($this->lastModified[0]);
981 if (is_null($this->lastModified[0]->getNext())) {
982 foreach ($this->lastModified as $lastMod) {
983 $lastMod->setNext($mod);
984 }
985 }
986 }
987 $nextLastModified[] = $mod;
988
989 // oldTextNodes is used here because we're going to move its deleted
990 // elements
991 // to this tree!
992 $this->oldTextNodes[$i]->setModification($mod);
993 }
994 $this->oldTextNodes[$start]->getModification()->setFirstOfID(true);
995
996 $deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID);
997
998 //wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes));
999
1000 // Set prevLeaf to the leaf after which the old HTML needs to be
1001 // inserted
1002 if ($before > 0){
1003 $prevLeaf = $this->textNodes[$before - 1];
1004 }
1005 // Set nextLeaf to the leaf before which the old HTML needs to be
1006 // inserted
1007 if ($before < sizeof($this->textNodes)){
1008 $nextLeaf = $this->textNodes[$before];
1009 }
1010
1011 while (sizeof($deletedNodes) > 0) {
1012 if (isset($prevLeaf)) {
1013 $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
1014 } else {
1015 $prevResult = new LastCommonParentResult();
1016 $prevResult->setLastCommonParent($this->getBodyNode());
1017 $prevResult->setIndexInLastCommonParent(0);
1018 }
1019 if (isset($nextleaf)) {
1020 $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[sizeof($deletedNodes) - 1]);
1021 } else {
1022 $nextResult = new LastCommonParentResult();
1023 $nextResult->setLastCommonParent($this->getBodyNode());
1024 $nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNbChildren());
1025 }
1026
1027 if ($prevResult->getLastCommonParentDepth() == $nextResult->getLastCommonParentDepth()) {
1028 // We need some metric to choose which way to add-...
1029 if ($deletedNodes[0]->getParent() === $deletedNodes[sizeof($deletedNodes) - 1]->getParent()
1030 && $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent()) {
1031 // The difference is not in the parent
1032 $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
1033 } else {
1034 // The difference is in the parent, so compare them
1035 // now THIS is tricky
1036 $distancePrev = $deletedNodes[0]->getParent()->getMatchRatio($prevResult->getLastCommonParent());
1037 $distanceNext = $deletedNodes[sizeof($deletedNodes) - 1]->getParent()->getMatchRatio($nextResult->getLastCommonParent());
1038
1039 if ($distancePrev <= $distanceNext) {
1040 $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
1041 } else {
1042 $nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1);
1043 }
1044 }
1045
1046 }
1047
1048 if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) {
1049 // Inserting at the front
1050 if ($prevResult->isSplittingNeeded()) {
1051 $prevLeaf->getParent()->splitUntill($prevResult->getLastCommonParent(), $prevLeaf, true);
1052 }
1053 $prevLeaf = $deletedNodes[0]->copyTree();
1054 unset($deletedNodes[0]);
1055 $deletedNodes = array_values($deletedNodes);
1056 $prevLeaf->setParent($prevResult->getLastCommonParent());
1057 $prevResult->getLastCommonParent()->addChild($prevLeaf,$prevResult->getIndexInLastCommonParent() + 1);
1058 } else if ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) {
1059 // Inserting at the back
1060 if ($nextResult->isSplittingNeeded()) {
1061 $splitOccured = $nextLeaf->getParent()->splitUntill($nextResult->getLastCommonParent(), $nextLeaf, false);
1062 if ($splitOccured) {
1063 // The place where to insert is shifted one place to the
1064 // right
1065 $nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1);
1066 }
1067 }
1068 $nextLeaf = $deletedNodes[sizeof(deletedNodes) - 1]->copyTree();
1069 unset($deletedNodes[sizeof(deletedNodes) - 1]);
1070 $deletedNodes = array_values($deletedNodes);
1071 $nextLeaf->setParent($nextResult->getLastCommonParent());
1072 $nextResult->getLastCommonParent()->addChild($nextLeaf,$nextResult->getIndexInLastCommonParent());
1073 } else
1074 throw new Exception("Uh?");
1075 }
1076 $this->lastModified = $nextLastModified;
1077 ++$this->deletedID;
1078 }
1079
1080 public function expandWhiteSpace() {
1081 $this->getBodyNode()->expandWhiteSpace();
1082 }
1083
1084 public function lengthNew(){
1085 return sizeof($this->textNodes);
1086 }
1087
1088 public function lengthOld(){
1089 return sizeof($this->oldTextNodes);
1090 }
1091 }
1092
1093 class HTMLDiffer{
1094
1095 private $output;
1096
1097 function __construct($output){
1098 $this->output = $output;
1099 }
1100
1101 function htmlDiff($from, $to){
1102 // Create an XML parser
1103 $xml_parser = xml_parser_create('');
1104
1105 $domfrom = new DomTreeBuilder();
1106
1107 // Set the functions to handle opening and closing tags
1108 xml_set_element_handler($xml_parser, array($domfrom,"startElement"), array($domfrom,"endElement"));
1109
1110 // Set the function to handle blocks of character data
1111 xml_set_character_data_handler($xml_parser, array($domfrom,"characters"));
1112
1113 ;
1114 //wfDebug('Parsing '.strlen($from)." characters worth of HTML");
1115 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
1116 || !xml_parse($xml_parser, $from, FALSE)
1117 || !xml_parse($xml_parser, '</body>', TRUE)){
1118 wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
1119 }
1120 xml_parser_free($xml_parser);
1121 unset($from);
1122
1123 $xml_parser = xml_parser_create('');
1124
1125 $domto = new DomTreeBuilder();
1126
1127 // Set the functions to handle opening and closing tags
1128 xml_set_element_handler($xml_parser, array($domto,"startElement"), array($domto,"endElement"));
1129
1130 // Set the function to handle blocks of character data
1131 xml_set_character_data_handler($xml_parser, array($domto,"characters"));
1132
1133 //wfDebug('Parsing '.strlen($to)." characters worth of HTML");
1134 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
1135 || !xml_parse($xml_parser, $to, FALSE)
1136 || !xml_parse($xml_parser, '</body>', TRUE)){
1137 wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
1138 }
1139 xml_parser_free($xml_parser);
1140 unset($to);
1141
1142 $diffengine = new _DiffEngine();
1143 $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
1144 unset($xml_parser,$diffengine);
1145
1146 $domdiffer = new TextNodeDiffer($domto, $domfrom);
1147
1148 $currentIndexLeft = 0;
1149 $currentIndexRight = 0;
1150 foreach ($differences as $d) {
1151 if ($d->leftstart > $currentIndexLeft) {
1152 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
1153 $currentIndexRight, $d->rightstart);
1154 }
1155 if ($d->leftlength > 0) {
1156 $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
1157 }
1158 $domdiffer->markAsNew($d->rightstart, $d->rightend);
1159
1160 $currentIndexLeft = $d->leftend;
1161 $currentIndexRight = $d->rightend;
1162 }
1163 if ($currentIndexLeft < $domdiffer->lengthOld()) {
1164 $domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew());
1165 }
1166
1167 $domdiffer->expandWhiteSpace();
1168 $output = new HTMLOutput('htmldiff', $this->output);
1169 $output->parse($domdiffer->getBodyNode());
1170 }
1171
1172 private function preProcess(/*array*/ $differences){
1173 $newRanges = array();
1174
1175 $nbDifferences = sizeof($differences);
1176 for ($i = 0; $i < $nbDifferences; ++$i) {
1177 $leftStart = $differences[$i]->leftstart;
1178 $leftEnd = $differences[$i]->leftend;
1179 $rightStart = $differences[$i]->rightstart;
1180 $rightEnd = $differences[$i]->rightend;
1181
1182 $leftLength = $leftEnd - $leftStart;
1183 $rightLength = $rightEnd - $rightStart;
1184
1185 while ($i + 1 < $nbDifferences && self::score($leftLength, $differences[$i + 1]->leftlength,
1186 $rightLength, $differences[$i + 1]->rightlength) > ($differences[$i + 1]->leftstart - $leftEnd)) {
1187 $leftEnd = $differences[$i + 1]->leftend;
1188 $rightEnd = $differences[$i + 1]->rightend;
1189 $leftLength = $leftEnd - $leftStart;
1190 $rightLength = $rightEnd - $rightStart;
1191 ++$i;
1192 }
1193 $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
1194 }
1195 return $newRanges;
1196 }
1197
1198 /**
1199 * Heuristic to merge differences for readability.
1200 */
1201 public static function score($ll, $nll, $rl, $nrl) {
1202 if (($ll == 0 && $nll == 0)
1203 || ($rl == 0 && $nrl == 0)){
1204 return 0;
1205 }
1206 $numbers = array($ll, $nll, $rl, $nrl);
1207 $d = 0;
1208 foreach ($numbers as $number) {
1209 while ($number > 3) {
1210 $d += 3;
1211 $number -= 3;
1212 $number *= 0.5;
1213 }
1214 $d += $number;
1215
1216 }
1217 return $d / (1.5 * sizeof($numbers));
1218 }
1219
1220 }
1221
1222 class TextOnlyComparator{
1223
1224 public $leafs = array();
1225
1226 function _construct(TagNode $tree) {
1227 $this->addRecursive($tree);
1228 $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
1229 }
1230
1231 private function addRecursive(TagNode $tree) {
1232 foreach ($tree->children as $child) {
1233 if ($child instanceof TagNode) {
1234 $this->addRecursive($child);
1235 } else if ($child instanceof TextNode) {
1236 $this->leafs[] = $node;
1237 }
1238 }
1239 }
1240
1241 public function getMatchRatio(TextOnlyComparator $other) {
1242 $nbOthers = sizeof($other->leafs);
1243 $nbThis = sizeof($this->leafs);
1244 if($nbOthers == 0 || $nbThis == 0){
1245 return -log(0);
1246 }
1247
1248 $diffengine = new _DiffEngine();
1249 $diffengine->diff_local($this->leafs, $other->leafs);
1250
1251 $distanceThis = array_sum($diffengine->xchanged);
1252 $distanceOther = array_sum($diffengine->ychanged);
1253
1254 return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis)
1255 / $nbThis) / 2.0;
1256 }
1257 }
1258
1259 class AncestorComparatorResult {
1260
1261 private $changed = false;
1262
1263 private $changes = "";
1264
1265 public function isChanged() {
1266 return $this->changed;
1267 }
1268
1269 public function setChanged($changed) {
1270 $this->changed = $changed;
1271 }
1272
1273 public function getChanges() {
1274 return $this->changes;
1275 }
1276
1277 public function setChanges($changes) {
1278 $this->changes = $changes;
1279 }
1280 }
1281
1282 /**
1283 * A comparator used when calculating the difference in ancestry of two Nodes.
1284 */
1285 class AncestorComparator{
1286
1287 public $ancestors;
1288 public $ancestorsText;
1289
1290 function __construct(/*array*/ $ancestors) {
1291 $this->ancestors = $ancestors;
1292 $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
1293 }
1294
1295 private $compareTxt = "";
1296
1297 public function getCompareTxt() {
1298 return $this->compareTxt;
1299 }
1300
1301 public function getResult(AncestorComparator $other) {
1302 $result = new AncestorComparatorResult();
1303
1304 $diffengine = new _DiffEngine();
1305 $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText);
1306
1307 if (sizeof($differences) == 0){
1308 return $result;
1309 }
1310 $changeTxt = new ChangeTextGenerator($this, $other);
1311
1312 $result->setChanged(true);
1313 $result->setChanges($changeTxt->getChanged($differences)->toString());
1314
1315 return $result;
1316 }
1317 }
1318
1319 class ChangeTextGenerator {
1320
1321 private $new;
1322 private $old;
1323
1324 private $factory;
1325
1326 function __construct(AncestorComparator $old, AncestorComparator $new) {
1327 $this->new = $new;
1328 $this->old = $old;
1329 $this->factory = new TagToStringFactory();
1330 }
1331
1332 public function getChanged(/*array*/ $differences) {
1333 $txt = new ChangeText;
1334
1335 $rootlistopened = false;
1336
1337 if (sizeof($differences) > 1) {
1338 $txt->addHtml('<ul class="changelist">');
1339 $rootlistopened = true;
1340 }
1341
1342 $nbDifferences = sizeof($differences);
1343 for ($j = 0; $j < $nbDifferences; ++$j) {
1344 $d = $differences[$j];
1345
1346 $lvl1listopened = false;
1347
1348 if ($rootlistopened) {
1349 $txt->addHtml('<li>');
1350 }
1351
1352 if ($d->leftlength + $d->rightlength > 1) {
1353 $txt->addHtml('<ul class="changelist">');
1354 $lvl1listopened = true;
1355 }
1356
1357 // left are the old ones
1358 for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
1359 if ($lvl1listopened){
1360 $txt->addHtml('<li>');
1361 }
1362 // add a bullet for a old tag
1363 $this->addTagOld($txt, $this->old->ancestors[$i]);
1364
1365 if ($lvl1listopened){
1366 $txt->addHtml('</li>');
1367 }
1368 }
1369
1370 // right are the new ones
1371 for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
1372 if ($lvl1listopened){
1373 $txt->addHtml('<li>');
1374 }
1375
1376 // add a bullet for a new tag
1377 $this->addTagNew($txt, $this->new->ancestors[$i]);
1378
1379 if ($lvl1listopened){
1380 $txt->addHtml('</li>');
1381 }
1382
1383 }
1384
1385 if ($lvl1listopened) {
1386 $txt->addHtml('</ul>');
1387 }
1388
1389 if ($rootlistopened) {
1390 $txt->addHtml('</li>');
1391 }
1392 }
1393
1394 if ($rootlistopened) {
1395 $txt->addHtml('</ul>');
1396 }
1397
1398 return $txt;
1399
1400 }
1401
1402 private function addTagOld(ChangeText $txt, TagNode $ancestor) {
1403 $this->factory->create($ancestor)->getRemovedDescription($txt);
1404 }
1405
1406 private function addTagNew(ChangeText $txt, TagNode $ancestor) {
1407 $this->factory->create($ancestor)->getAddedDescription($txt);
1408 }
1409 }
1410
1411 class ChangeText {
1412
1413 private $txt = "";
1414
1415 const newLine = "<br/>";
1416
1417 public function addText($s) {
1418 $s = $this->clean($s);
1419 $this->txt .= $s;
1420 }
1421
1422 public function addHtml($s) {
1423 $this->txt .= $s;
1424 }
1425
1426 public function addNewLine() {
1427 $this->addHtml(self::newLine);
1428 }
1429
1430 public function toString() {
1431 return $this->txt;
1432 }
1433
1434 private function clean($s) {
1435 return htmlspecialchars($s);
1436 }
1437 }
1438
1439 class TagToStringFactory {
1440
1441 private static $containerTags = array(
1442 'html' => TRUE,
1443 'body' => TRUE,
1444 'p' => TRUE,
1445 'blockquote' => TRUE,
1446 'h1' => TRUE,
1447 'h2' => TRUE,
1448 'h3' => TRUE,
1449 'h4' => TRUE,
1450 'h5' => TRUE,
1451 'pre' => TRUE,
1452 'div' => TRUE,
1453 'ul' => TRUE,
1454 'ol' => TRUE,
1455 'li' => TRUE,
1456 'table' => TRUE,
1457 'tbody' => TRUE,
1458 'tr' => TRUE,
1459 'td' => TRUE,
1460 'th' => TRUE,
1461 'br' => TRUE,
1462 'hr' => TRUE,
1463 'code' => TRUE,
1464 'dl' => TRUE,
1465 'dt' => TRUE,
1466 'dd' => TRUE,
1467 'input' => TRUE,
1468 'form' => TRUE,
1469 'img' => TRUE,
1470 // in-line tags that can be considered containers not styles
1471 'span' => TRUE,
1472 'a' => TRUE
1473 );
1474
1475 private static $styleTags = array(
1476 'i' => TRUE,
1477 'b' => TRUE,
1478 'strong' => TRUE,
1479 'em' => TRUE,
1480 'font' => TRUE,
1481 'big' => TRUE,
1482 'del' => TRUE,
1483 'tt' => TRUE,
1484 'sub' => TRUE,
1485 'sup' => TRUE,
1486 'strike' => TRUE
1487 );
1488
1489 const MOVED = 1;
1490 const STYLE = 2;
1491 const UNKNOWN = 4;
1492
1493 public function create(TagNode $node) {
1494 $sem = $this->getChangeSemantic($node->getQName());
1495 if (0 == strcasecmp($node->getQName(),'a')){
1496 return new AnchorToString($node, $sem);
1497 }
1498 if (0 == strcasecmp($node->getQName(),'img')){
1499 return new NoContentTagToString($node, $sem);
1500 }
1501 return new TagToString($node, $sem);
1502 }
1503
1504 protected function getChangeSemantic($qname) {
1505 if (array_key_exists(strtolower($qname),self::$containerTags)){
1506 return self::MOVED;
1507 }
1508 if (array_key_exists(strtolower($qname),self::$styleTags)){
1509 return self::STYLE;
1510 }
1511 return self::UNKNOWN;
1512 }
1513 }
1514
1515 class TagToString {
1516
1517 protected $node;
1518
1519 protected $sem;
1520
1521 function __construct(TagNode $node, $sem) {
1522 $this->node = $node;
1523 $this->sem = $sem;
1524 }
1525
1526 public function getDescription() {
1527 return $this->getString('diff-' . $this->node->getQName());
1528 }
1529
1530 public function getRemovedDescription(ChangeText $txt) {
1531
1532 if ($this->sem == TagToStringFactory::MOVED) {
1533 $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' ');
1534 $txt->addHtml('<b>');
1535 $txt->addText(strtolower($this->getDescription()));
1536 $txt->addHtml('</b>');
1537 } else if ($this->sem == TagToStringFactory::STYLE) {
1538 $txt->addHtml('<b>');
1539 $txt->addText($this->getDescription());
1540 $txt->addHtml('</b>');
1541 $txt->addText(' ' . strtolower($this->getStyleRemoved()));
1542 } else {
1543 $txt->addHtml('<b>');
1544 $txt->addText($this->getDescription());
1545 $txt->addHtml('</b>');
1546 $txt->addText(' ' . strtolower($this->getRemoved()));
1547 }
1548 $this->addAttributes($txt, $this->node->getAttributes());
1549 $txt->addText('.');
1550 }
1551
1552 public function getAddedDescription(ChangeText $txt) {
1553
1554 if ($this->sem == TagToStringFactory::MOVED) {
1555 $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' ');
1556 $txt->addHtml('<b>');
1557 $txt->addText(strtolower($this->getDescription()));
1558 $txt->addHtml('</b>');
1559 } else if ($this->sem == TagToStringFactory::STYLE) {
1560 $txt->addHtml('<b>');
1561 $txt->addText($this->getDescription());
1562 $txt->addHtml('</b>');
1563 $txt->addText(' ' . strtolower($this->getStyleAdded()));
1564 } else {
1565 $txt->addHtml('<b>');
1566 $txt->addText($this->getDescription());
1567 $txt->addHtml('</b>');
1568 $txt->addText(' ' . strtolower($this->getAdded()));
1569 }
1570 $this->addAttributes($txt, $this->node->getAttributes());
1571 $txt->addText('.');
1572 }
1573
1574 protected function getMovedTo() {
1575 return $this->getString('diff-movedto');
1576 }
1577
1578 protected function getStyleAdded() {
1579 return $this->getString('diff-styleadded');
1580 }
1581
1582 protected function getAdded() {
1583 return $this->getString('diff-added');
1584 }
1585
1586 protected function getMovedOutOf() {
1587 return $this->getString('diff-movedoutof');
1588 }
1589
1590 protected function getStyleRemoved() {
1591 return $this->getString('diff-styleremoved');
1592 }
1593
1594 protected function getRemoved() {
1595 return $this->getString('diff-removed');
1596 }
1597
1598 protected function addAttributes(ChangeText $txt, array $attributes) {
1599 if (sizeof($attributes) < 1)
1600 return;
1601
1602 $keys = array_keys($attributes);
1603
1604 $txt->addText(' ' . strtolower($this->getWith()) . ' '
1605 . $this->translateArgument($keys[0]) . ' '
1606 . $attributes[$keys[0]]);
1607 for ($i = 1; $i < sizeof($attributes) - 1; $i++) {
1608 $txt->addText(', ' . $this->translateArgument($keys[$i]) . ' '
1609 . $attributes[$keys[$i]]);
1610 }
1611 if (sizeof($attributes) > 1) {
1612 $txt->addText(' '
1613 . strtolower($this->getAnd())
1614 . ' '
1615 . $this->translateArgument($keys[sizeof($attributes) - 1]) . ' '
1616 . $attributes[$keys[sizeof($attributes) - 1]]);
1617 }
1618 }
1619
1620 private function getAnd() {
1621 return $this->getString('diff-and');
1622 }
1623
1624 private function getWith() {
1625 return $this->getString('diff-with');
1626 }
1627
1628 protected function translateArgument($name) {
1629 if (0 == strcasecmp($name,'src'))
1630 return strtolower($this->getSource());
1631 if (0 == strcasecmp($name,'width'))
1632 return strtolower($this->getWidth());
1633 if (0 == strcasecmp($name,'height'))
1634 return strtolower($this->getHeight());
1635 return $name;
1636 }
1637
1638 private function getHeight() {
1639 return $this->getString('diff-height');
1640 }
1641
1642 private function getWidth() {
1643 return $this->getString('diff-width');
1644 }
1645
1646 protected function getSource() {
1647 return $this->getString('diff-source');
1648 }
1649
1650 protected function getArticle() {
1651 return $this->getString('diff-' . $this->node->getQName() . '-article');
1652 }
1653
1654 public static $bundle = array(
1655 'diff-movedto' => 'Moved to',
1656 'diff-styleadded' => 'Style added',
1657 'diff-added' => 'Added',
1658 'diff-changedto' => 'Changed to',
1659 'diff-movedoutof' => 'Moved out of',
1660 'diff-styleremoved' => 'Style removed',
1661 'diff-removed' => 'Removed',
1662 'diff-changedfrom' => 'Changed from',
1663 'diff-source' => 'Source',
1664 'diff-withdestination' => 'With destination',
1665 'diff-and' => 'And',
1666 'diff-with' => 'With',
1667 'diff-width' => 'Width',
1668 'diff-height' => 'Height',
1669 'diff-html-article' => 'A',
1670 'diff-html' => 'Html page',
1671 'diff-body-article' => 'A',
1672 'diff-body' => 'Html document',
1673 'diff-p-article' => 'A',
1674 'diff-p' => 'Paragraph',
1675 'diff-blockquote-article' => 'A',
1676 'diff-blockquote' => 'Quote',
1677 'diff-h1-article' => 'A',
1678 'diff-h1' => 'Heading (level 1)',
1679 'diff-h2-article' => 'A',
1680 'diff-h2' => 'Heading (level 2)',
1681 'diff-h3-article' => 'A',
1682 'diff-h3' => 'Heading (level 3)',
1683 'diff-h4-article' => 'A',
1684 'diff-h4' => 'Heading (level 4)',
1685 'diff-h5-article' => 'A',
1686 'diff-h5' => 'Heading (level 5)',
1687 'diff-pre-article' => 'A',
1688 'diff-pre' => 'Preformatted block',
1689 'diff-div-article' => 'A',
1690 'diff-div' => 'Division',
1691 'diff-ul-article' => 'An',
1692 'diff-ul' => 'Unordered list',
1693 'diff-ol-article' => 'An',
1694 'diff-ol' => 'Ordered list',
1695 'diff-li-article' => 'A',
1696 'diff-li' => 'List item',
1697 'diff-table-article' => 'A',
1698 'diff-table' => 'Table',
1699 'diff-tbody-article' => 'A',
1700 'diff-tbody' => "Table's content",
1701 'diff-tr-article' => 'A',
1702 'diff-tr' => 'Row',
1703 'diff-td-article' => 'A',
1704 'diff-td' => 'Cell',
1705 'diff-th-article' => 'A',
1706 'diff-th' => 'Header',
1707 'diff-br-article' => 'A',
1708 'diff-br' => 'Break',
1709 'diff-hr-article' => 'A',
1710 'diff-hr' => 'Horizontal rule',
1711 'diff-code-article' => 'A',
1712 'diff-code' => 'Computer code block',
1713 'diff-dl-article' => 'A',
1714 'diff-dl' => 'Definition list',
1715 'diff-dt-article' => 'A',
1716 'diff-dt' => 'Definition term',
1717 'diff-dd-article' => 'A',
1718 'diff-dd' => 'Definition',
1719 'diff-input-article' => 'An',
1720 'diff-input' => 'Input',
1721 'diff-form-article' => 'A',
1722 'diff-form' => 'Form',
1723 'diff-img-article' => 'An',
1724 'diff-img' => 'Image',
1725 'diff-span-article' => 'A',
1726 'diff-span' => 'Span',
1727 'diff-a-article' => 'A',
1728 'diff-a' => 'Link',
1729 'diff-i' => 'Italics',
1730 'diff-b' => 'Bold',
1731 'diff-strong' => 'Strong',
1732 'diff-em' => 'Emphasis',
1733 'diff-font' => 'Font',
1734 'diff-big' => 'Big',
1735 'diff-del' => 'Deleted',
1736 'diff-tt' => 'Fixed width',
1737 'diff-sub' => 'Subscript',
1738 'diff-sup' => 'Superscript',
1739 'diff-strike' => 'Strikethrough'
1740 );
1741
1742 public function getString($key) {
1743 return self::$bundle[$key];
1744 }
1745 }
1746
1747 class NoContentTagToString extends TagToString {
1748
1749 function __construct(TagNode $node, $sem) {
1750 parent::__construct($node, $sem);
1751 }
1752
1753 public function getAddedDescription(ChangeText $txt) {
1754 $txt.addText($this->getChangedTo() . ' ' + strtolower($this->getArticle()) . ' ');
1755 $txt.addHtml('<b>');
1756 $txt.addText(strtolower($this->getDescription()));
1757 $txt.addHtml('</b>');
1758
1759 $this->addAttributes($txt, $this->node->getAttributes());
1760 $txt.addText('.');
1761 }
1762
1763 private function getChangedTo() {
1764 return $this->getString('diff-changedto');
1765 }
1766
1767 public function getRemovedDescription(ChangeText $txt) {
1768 $txt.addText($this->getChangedFrom() . ' ' + strtolower($this->getArticle()) . ' ');
1769 $txt.addHtml('<b>');
1770 $txt.addText(strtolower($this->getDescription()));
1771 $txt.addHtml('</b>');
1772
1773 $this->addAttributes($txt, $this->node->getAttributes());
1774 $txt.addText('.');
1775 }
1776
1777 private function getChangedFrom() {
1778 return $this->getString('diff-changedfrom');
1779 }
1780 }
1781
1782 class AnchorToString extends TagToString {
1783
1784 function __construct(TagNode $node, $sem) {
1785 parent::__construct($node, $sem);
1786 }
1787
1788 protected function addAttributes(ChangeText $txt, array $attributes) {
1789 if (array_key_exists('href',$attributes)) {
1790 $txt->addText(' ' . strtolower($this->getWithDestination()) . ' ' . $attributes['href']);
1791 unset($attributes['href']);
1792 }
1793 parent::addAttributes($txt, $attributes);
1794 }
1795
1796 private function getWithDestination() {
1797 return $this->getString('diff-withdestination');
1798 }
1799
1800 }
1801
1802 /**
1803 * Takes a branch root and creates an HTML file for it.
1804 */
1805 class HTMLOutput{
1806
1807 private $prefix;
1808 private $handler;
1809
1810 function __construct($prefix, $handler) {
1811 $this->prefix = $prefix;
1812 $this->handler = $handler;
1813 }
1814
1815 public function parse(TagNode $node) {
1816
1817 if (0 != strcasecmp($node->getQName(),'img') && 0 != strcasecmp($node->getQName(),'body')) {
1818 $this->handler->startElement($node->getQName(), $node->getAttributes());
1819 }
1820
1821 $newStarted = false;
1822 $remStarted = false;
1823 $changeStarted = false;
1824 $changeTXT = '';
1825
1826 foreach ($node->children as $child) {
1827 if ($child instanceof TagNode) {
1828 if ($newStarted) {
1829 $this->handler->endElement('span');
1830 $newStarted = false;
1831 } else if ($changeStarted) {
1832 $this->handler->endElement('span');
1833 $changeStarted = false;
1834 } else if ($remStarted) {
1835 $this->handler->endElement('span');
1836 $remStarted = false;
1837 }
1838 $this->parse($child);
1839 } else if ($child instanceof TextNode) {
1840 $mod = $child->getModification();
1841
1842 if ($newStarted && ($mod->getType() != Modification::ADDED || $mod->isFirstOfID())) {
1843 $this->handler->endElement('span');
1844 $newStarted = false;
1845 } else if ($changeStarted && ($mod->getType() != Modification::CHANGED || $mod->getChanges() != $changeTXT || $mod->isFirstOfID())) {
1846 $this->handler->endElement('span');
1847 $changeStarted = false;
1848 } else if ($remStarted && ($mod->getType() != Modification::REMOVED || $mod ->isFirstOfID())) {
1849 $this->handler->endElement('span');
1850 $remStarted = false;
1851 }
1852
1853 // no else because a removed part can just be closed and a new
1854 // part can start
1855 if (!$newStarted && $mod->getType() == Modification::ADDED) {
1856 $attrs = array('class'=>'diff-html-added');
1857 if ($mod->isFirstOfID()) {
1858 $attrs['id'] = 'added-' . $this->prefix . '-' . $mod->getID();
1859 }
1860 $this->addAttributes($mod, $attrs);
1861 $attrs['onclick'] = 'return tipA(constructToolTipA(this));';
1862 $this->handler->startElement('span', $attrs);
1863 $newStarted = true;
1864 } else if (!$changeStarted && $mod->getType() == Modification::CHANGED) {
1865 $attrs = array('class'=>'diff-html-changed');
1866 if ($mod->isFirstOfID()) {
1867 $attrs['id'] = 'changed-' . $this->prefix . '-' . $mod->getID();
1868 }
1869 $this->addAttributes($mod, $attrs);
1870 $attrs['onclick'] = 'return tipC(constructToolTipC(this));';
1871 $this->handler->startElement('span', $attrs);
1872
1873 //tooltip
1874 $this->handler->startElement('span', array('class'=>'tip'));
1875 $this->handler->characters($mod->getChanges());
1876 $this->handler->endElement('span');
1877
1878 $changeStarted = true;
1879 $changeTXT = $mod->getChanges();
1880 } else if (!$remStarted && $mod->getType() == Modification::REMOVED) {
1881 $attrs = array('class'=>'diff-html-removed');
1882 if ($mod->isFirstOfID()) {
1883 $attrs['id'] = 'removed-' . $this->prefix . '-' . $mod->getID();
1884 }
1885 $this->addAttributes($mod, $attrs);
1886 $attrs['onclick'] = 'return tipR(constructToolTipR(this));';
1887 $this->handler->startElement('span', $attrs);
1888 $remStarted = true;
1889 }
1890
1891 $chars = $child->getText();
1892
1893 if ($child instanceof ImageNode) {
1894 $this->writeImage($child);
1895 } else {
1896 $this->handler->characters($chars);
1897 }
1898
1899 }
1900 }
1901
1902 if ($newStarted) {
1903 $this->handler->endElement('span');
1904 $newStarted = false;
1905 } else if ($changeStarted) {
1906 $this->handler->endElement('span');
1907 $changeStarted = false;
1908 } else if ($remStarted) {
1909 $this->handler->endElement('span');
1910 $remStarted = false;
1911 }
1912
1913 if (0 != strcasecmp($node->getQName(),'img')
1914 && 0 != strcasecmp($node->getQName(),'body'))
1915 $this->handler->endElement($node->getQName());
1916 }
1917
1918 private function writeImage(ImageNode $imgNode){
1919 $attrs = $imgNode->getAttributes();
1920 if ($imgNode->getModification()->getType() == Modification::REMOVED)
1921 $attrs['changeType']='diff-removed-image';
1922 else if ($imgNode->getModification()->getType() == Modification::ADDED)
1923 $attrs['changeType'] = 'diff-added-image';
1924 $attrs['onload'] = 'updateOverlays()';
1925 $attrs['onError'] = 'updateOverlays()';
1926 $attrs['onAbort'] = 'updateOverlays()';
1927 $this->handler->startElement('img', $attrs);
1928 $this->handler->endElement('img');
1929 }
1930
1931 private function addAttributes(Modification $mod, /*array*/ &$attrs) {
1932 if (is_null($mod->getPrevious())) {
1933 $previous = 'first-' . $this->prefix;
1934 } else {
1935 $previous = Modification::typeToString($mod->getPrevious()->getType()) . '-' . $this->prefix . '-'
1936 . $mod->getPrevious()->getID();
1937 }
1938 $attrs['previous'] = $previous;
1939
1940 $changeId = Modification::typeToString($mod->getType()) . '-' + $this->prefix . '-' . $mod->getID();
1941 $attrs['changeId'] = $changeId;
1942
1943 if (is_null($mod->getNext())) {
1944 $next = 'last-' . $this->prefix;
1945 } else {
1946 $next = Modification::typeToString($mod->getNext()->getType()) . '-' . $this->prefix . '-'
1947 . $mod->getNext()->getID();
1948 }
1949 $attrs['next'] = $next;
1950 }
1951 }
1952
1953 class EchoingContentHandler{
1954
1955 function startElement($qname, /*array*/ $arguments){
1956 echo '<'.$qname;
1957 foreach($arguments as $key => $value){
1958 echo ' '.$key.'="'.Sanitizer::encodeAttribute($value).'"';
1959 }
1960 echo '>';
1961 }
1962
1963 function endElement($qname){
1964 echo '</'.$qname.'>';
1965 }
1966
1967 function characters($chars){
1968 echo $chars;
1969 }
1970
1971 }
1972
1973 class DelegatingContentHandler{
1974
1975 private $delegate;
1976
1977 function __construct($delegate){
1978 $this->delegate=$delegate;
1979 }
1980
1981 function startElement($qname, /*array*/ $arguments){
1982 $this->delegate->addHtml('<'.$qname) ;
1983 foreach($arguments as $key => $value){
1984 $this->delegate->addHtml(' '.$key.'="'. Sanitizer::encodeAttribute($value) .'"');
1985 }
1986 $this->delegate->addHtml('>');
1987 }
1988
1989 function endElement($qname){
1990 $this->delegate->addHtml('</'.$qname.'>');
1991 }
1992
1993 function characters($chars){
1994 $this->delegate->addHtml( $chars );
1995 }
1996
1997 }