2 /* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * or see http://www.gnu.org/
21 * Any element in the DOM tree of an HTML document.
27 protected $parentTree;
29 public $whiteBefore = false;
31 public $whiteAfter = false;
33 function __construct($parent) {
34 $this->parent
= $parent;
37 public function getParentTree() {
38 if (!isset($this->parentTree
)) {
39 if (!is_null($this->parent
)) {
40 $this->parentTree
= $this->parent
->getParentTree();
41 $this->parentTree
[] = $this->parent
;
43 $this->parentTree
= array();
46 return $this->parentTree
;
49 public function getLastCommonParent(Node
$other) {
50 $result = new LastCommonParentResult();
52 $myParents = $this->getParentTree();
53 $otherParents = $other->getParentTree();
57 $nbMyParents = count($myParents);
58 $nbOtherParents = count($otherParents);
59 while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) {
60 if (!$myParents[$i]->openingTag
=== $otherParents[$i]->openingTag
) {
63 // After a while, the index i-1 must be the last common parent
68 $result->lastCommonParentDepth
= $i - 1;
69 $result->parent
= $myParents[$i - 1];
71 if (!$isSame ||
$nbMyParents > $nbOtherParents) {
72 // Not all tags matched, or all tags matched but
73 // there are tags left in this tree
74 $result->indexInLastCommonParent
= $myParents[$i - 1]->getIndexOf($myParents[$i]);
75 $result->splittingNeeded
= true;
76 } else if ($nbMyParents <= $nbOtherParents) {
77 $result->indexInLastCommonParent
= $myParents[$i - 1]->getIndexOf($this);
82 public function setParent($parent) {
83 $this->parent
= $parent;
84 unset($this->parentTree
);
87 public function inPre() {
88 $tree = $this->getParentTree();
89 foreach ($tree as &$ancestor) {
90 if ($ancestor->isPre()) {
99 * Node that can contain other nodes. Represents an HTML tag.
101 class TagNode
extends Node
{
103 public $children = array();
107 public $attributes = array();
111 function __construct($parent, $qName, /*array*/ $attributes) {
112 parent
::__construct($parent);
113 $this->qName
= strtolower($qName);
114 foreach($attributes as $key => &$value){
115 $this->attributes
[strtolower($key)] = $value;
117 return $this->openingTag
= Xml
::openElement($this->qName
, $this->attributes
);
120 public function addChildAbsolute(Node
$node, $index) {
121 array_splice($this->children
, $index, 0, array($node));
124 public function getIndexOf(Node
$child) {
125 // don't trust array_search with objects
126 foreach ($this->children
as $key => &$value){
127 if ($value === $child) {
134 public function getNbChildren() {
135 return count($this->children
);
138 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
142 $somethingDeleted = false;
143 $hasNonDeletedDescendant = false;
145 if (empty($this->children
)) {
149 foreach ($this->children
as &$child) {
150 $allDeleted_local = false;
151 $somethingDeleted_local = false;
152 $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local);
153 if ($somethingDeleted_local) {
154 $nodes = array_merge($nodes, $childrenChildren);
155 $somethingDeleted = true;
157 if (!$allDeleted_local) {
158 $hasNonDeletedDescendant = true;
161 if (!$hasNonDeletedDescendant) {
162 $nodes = array($this);
168 public function splitUntil(TagNode
$parent, Node
$split, $includeLeft) {
169 $splitOccured = false;
170 if ($parent !== $this) {
171 $part1 = new TagNode(null, $this->qName
, $this->attributes
);
172 $part2 = new TagNode(null, $this->qName
, $this->attributes
);
173 $part1->setParent($this->parent
);
174 $part2->setParent($this->parent
);
178 foreach ($this->children
as &$child)
180 if ($child === $split) {
183 if(!$pastSplit ||
($onSplit && $includeLeft)) {
184 $child->setParent($part1);
185 $part1->children
[] = $child;
187 $child->setParent($part2);
188 $part2->children
[] = $child;
195 $myindexinparent = $this->parent
->getIndexOf($this);
196 if (!empty($part1->children
)) {
197 $this->parent
->addChildAbsolute($part1, $myindexinparent);
199 if (!empty($part2->children
)) {
200 $this->parent
->addChildAbsolute($part2, $myindexinparent);
202 if (!empty($part1->children
) && !empty($part2->children
)) {
203 $splitOccured = true;
206 $this->parent
->removeChild($myindexinparent);
209 $this->parent
->splitUntil($parent, $part1, $includeLeft);
211 $this->parent
->splitUntil($parent, $part2, $includeLeft);
214 return $splitOccured;
218 private function removeChild($index) {
219 unset($this->children
[$index]);
220 $this->children
= array_values($this->children
);
223 public static $blocks = array('html', 'body','p','blockquote', 'h1',
224 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table',
225 'tbody', 'tr', 'td', 'th', 'br');
227 public function copyTree() {
228 $newThis = new TagNode(null, $this->qName
, $this->attributes
);
229 $newThis->whiteBefore
= $this->whiteBefore
;
230 $newThis->whiteAfter
= $this->whiteAfter
;
231 foreach ($this->children
as &$child) {
232 $newChild = $child->copyTree();
233 $newChild->setParent($newThis);
234 $newThis->children
[] = $newChild;
239 public function getMatchRatio(TagNode
$other) {
240 $txtComp = new TextOnlyComparator($other);
241 return $txtComp->getMatchRatio(new TextOnlyComparator($this));
244 public function expandWhiteSpace() {
248 $nbOriginalChildren = $this->getNbChildren();
249 for ($i = 0; $i < $nbOriginalChildren; ++
$i) {
250 $child = $this->children
[$i +
$shift];
252 if ($child instanceof TagNode
) {
253 if (!$child->isPre()) {
254 $child->expandWhiteSpace();
257 if (!$spaceAdded && $child->whiteBefore
) {
258 $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild());
259 $ws->setParent($this);
260 $this->addChildAbsolute($ws,$i +
($shift++
));
262 if ($child->whiteAfter
) {
263 $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild());
264 $ws->setParent($this);
265 $this->addChildAbsolute($ws,$i +
1 +
($shift++
));
274 public function getLeftMostChild() {
275 if (empty($this->children
)) {
278 return $this->children
[0]->getLeftMostChild();
281 public function getRightMostChild() {
282 if (empty($this->children
)) {
285 return $this->children
[$this->getNbChildren() - 1]->getRightMostChild();
288 public function isPre() {
289 return 0 == strcasecmp($this->qName
,'pre');
292 public static function toDiffLine(TagNode
$node) {
293 return $node->openingTag
;
298 * Represents a piece of text in the HTML file.
300 class TextNode
extends Node
{
304 public $modification;
306 function __construct($parent, $text) {
307 parent
::__construct($parent);
308 $this->modification
= new Modification(Modification
::NONE
);
312 public function copyTree() {
313 $clone = clone $this;
314 $clone->setParent(null);
318 public function getLeftMostChild() {
322 public function getRightMostChild() {
326 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
327 if ($this->modification
->type
== Modification
::REMOVED
328 && $this->modification
->id
== $id){
329 $somethingDeleted = true;
336 public function isSameText($other) {
337 if (is_null($other) ||
! $other instanceof TextNode
) {
340 return str_replace('\n', ' ',$this->text
) === str_replace('\n', ' ',$other->text
);
343 public static function toDiffLine(TextNode
$node) {
344 return str_replace('\n', ' ',$node->text
);
348 class WhiteSpaceNode
extends TextNode
{
350 function __construct($parent, $s, Node
$like = null) {
351 parent
::__construct($parent, $s);
352 if(!is_null($like) && $like instanceof TextNode
) {
353 $newModification = clone $like->modification
;
354 $newModification->firstOfID
= false;
355 $this->modification
= $newModification;
361 * Represents the root of a HTML document.
363 class BodyNode
extends TagNode
{
365 function __construct() {
366 parent
::__construct(null, 'body', array());
369 public function copyTree() {
370 $newThis = new BodyNode();
371 foreach ($this->children
as &$child) {
372 $newChild = $child->copyTree();
373 $newChild->setParent($newThis);
374 $newThis->children
[] = $newChild;
379 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
381 foreach ($this->children
as &$child) {
382 $childrenChildren = $child->getMinimalDeletedSet($id,
383 $allDeleted, $somethingDeleted);
384 $nodes = array_merge($nodes, $childrenChildren);
392 * Represents an image in HTML. Even though images do not contain any text they
393 * are independent visible objects on the page. They are logically a TextNode.
395 class ImageNode
extends TextNode
{
399 function __construct(TagNode
$parent, /*array*/ $attrs) {
400 if(!array_key_exists('src', $attrs)) {
401 //wfDebug('Image without a source:');
402 //foreach ($attrs as $key => &$value) {
403 //wfDebug("$key = $value");
405 parent
::__construct($parent, '<img></img>');
407 parent
::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
409 $this->attributes
= $attrs;
412 public function isSameText($other) {
413 if (is_null($other) ||
! $other instanceof ImageNode
) {
416 return $this->text
=== $other->text
;
421 class DummyNode
extends Node
{
423 function __construct() {
430 * When detecting the last common parent of two nodes, all results are stored as
431 * a LastCommonParentResult.
433 class LastCommonParentResult
{
439 public $splittingNeeded = false;
442 public $lastCommonParentDepth = -1;
445 public $indexInLastCommonParent = -1;
459 public $firstOfID = false;
463 function __construct($type) {
467 public static function typeToString($type) {
469 case self
::NONE
: return 'none';
470 case self
::REMOVED
: return 'removed';
471 case self
::ADDED
: return 'added';
472 case self
::CHANGED
: return 'changed';
477 class DomTreeBuilder
{
479 public $textNodes = array();
483 private $currentParent;
485 private $newWord = '';
487 protected $bodyStarted = false;
489 protected $bodyEnded = false;
491 private $whiteSpaceBeforeThis = false;
493 private $lastSibling;
495 private $notInPre = true;
497 function __construct() {
498 $this->bodyNode
= $this->currentParent
= new BodyNode();
499 $this->lastSibling
= new DummyNode();
503 * Must be called manually
505 public function endDocument() {
507 //wfDebug(count($this->textNodes) . ' text nodes in document.');
510 public function startElement($parser, $name, /*array*/ $attributes) {
511 if (strcasecmp($name, 'body') != 0) {
512 //wfDebug("Starting $name node.");
515 $newNode = new TagNode($this->currentParent
, $name, $attributes);
516 $this->currentParent
->children
[] = $newNode;
517 $this->currentParent
= $newNode;
518 $this->lastSibling
= new DummyNode();
519 if ($this->whiteSpaceBeforeThis
&& !in_array(strtolower($this->currentParent
->qName
),TagNode
::$blocks)) {
520 $this->currentParent
->whiteBefore
= true;
522 $this->whiteSpaceBeforeThis
= false;
523 if(strcasecmp($name, 'pre') == 0) {
524 $this->notInPre
= false;
529 public function endElement($parser, $name) {
530 if(strcasecmp($name, 'body') != 0) {
531 //wfDebug("Ending $name node.");
532 if (0 == strcasecmp($name,'img')) {
533 // Insert a dummy leaf for the image
534 $img = new ImageNode($this->currentParent
, $this->currentParent
->attributes
);
535 $this->currentParent
->children
[] = $img;
536 $img->whiteBefore
= $this->whiteSpaceBeforeThis
;
537 $this->lastSibling
= $img;
538 $this->textNodes
[] = $img;
541 if (!in_array(strtolower($this->currentParent
->qName
),TagNode
::$blocks)) {
542 $this->lastSibling
= $this->currentParent
;
544 $this->lastSibling
= new DummyNode();
546 $this->currentParent
= $this->currentParent
->parent
;
547 $this->whiteSpaceBeforeThis
= false;
548 if (!$this->notInPre
&& strcasecmp($name, 'pre') == 0) {
549 $this->notInPre
= true;
552 $this->endDocument();
556 const regex
= '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1})/';
557 const whitespace
= '/^[\s]{1}$/';
558 const delimiter
= '/^[\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1}$/';
560 public function characters($parser, $data) {
561 $matches = preg_split(self
::regex
, $data, -1, PREG_SPLIT_DELIM_CAPTURE
);
563 foreach($matches as &$word) {
564 if (preg_match(self
::whitespace
, $word) && $this->notInPre
) {
566 $this->lastSibling
->whiteAfter
= true;
567 $this->whiteSpaceBeforeThis
= true;
568 } else if (preg_match(self
::delimiter
, $word)) {
570 $textNode = new TextNode($this->currentParent
, $word);
571 $this->currentParent
->children
[] = $textNode;
572 $textNode->whiteBefore
= $this->whiteSpaceBeforeThis
;
573 $this->whiteSpaceBeforeThis
= false;
574 $this->lastSibling
= $textNode;
575 $this->textNodes
[] = $textNode;
577 $this->newWord
.= $word;
582 private function endWord() {
583 if ($this->newWord
!== '') {
584 $node = new TextNode($this->currentParent
, $this->newWord
);
585 $this->currentParent
->children
[] = $node;
586 $node->whiteBefore
= $this->whiteSpaceBeforeThis
;
587 $this->whiteSpaceBeforeThis
= false;
588 $this->lastSibling
= $node;
589 $this->textNodes
[] = $node;
594 public function getDiffLines() {
595 return array_map(array('TextNode','toDiffLine'), $this->textNodes
);
599 class TextNodeDiffer
{
604 private $oldTextNodes;
605 private $oldBodyNode;
609 private $changedID = 0;
611 private $changedIDUsed = false;
613 // used to remove the whitespace between a red and green block
614 private $whiteAfterLastChangedPart = false;
616 private $deletedID = 0;
618 function __construct(DomTreeBuilder
$tree, DomTreeBuilder
$oldTree) {
619 $this->textNodes
= $tree->textNodes
;
620 $this->bodyNode
= $tree->bodyNode
;
621 $this->oldTextNodes
= $oldTree->textNodes
;
622 $this->oldBodyNode
= $oldTree->bodyNode
;
625 public function markAsNew($start, $end) {
626 if ($end <= $start) {
630 if ($this->whiteAfterLastChangedPart
) {
631 $this->textNodes
[$start]->whiteBefore
= false;
634 for ($i = $start; $i < $end; ++
$i) {
635 $mod = new Modification(Modification
::ADDED
);
636 $mod->id
= $this->newID
;
637 $this->textNodes
[$i]->modification
= $mod;
640 $this->textNodes
[$start]->modification
->firstOfID
= true;
645 public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
649 if ($this->changedIDUsed
) {
651 $this->changedIDUsed
= false;
655 while ($i < $rightend) {
656 $acthis = new AncestorComparator($this->textNodes
[$i]->getParentTree());
657 $acother = new AncestorComparator($this->oldTextNodes
[$j]->getParentTree());
658 $result = $acthis->getResult($acother);
659 unset($acthis, $acother);
661 if ($result->changed
) {
662 $mod = new Modification(Modification
::CHANGED
);
664 if (!$this->changedIDUsed
) {
665 $mod->firstOfID
= true;
666 } else if (!is_null($result->changes
) && $result->changes
!== $this->changes
) {
668 $mod->firstOfID
= true;
671 $mod->changes
= $result->changes
;
672 $mod->id
= $this->changedID
;
674 $this->textNodes
[$i]->modification
= $mod;
675 $this->changes
= $result->changes
;
676 $this->changedIDUsed
= true;
677 } else if ($this->changedIDUsed
) {
679 $this->changedIDUsed
= false;
686 public function markAsDeleted($start, $end, $before) {
688 if ($end <= $start) {
692 if ($before > 0 && $this->textNodes
[$before - 1]->whiteAfter
) {
693 $this->whiteAfterLastChangedPart
= true;
695 $this->whiteAfterLastChangedPart
= false;
698 for ($i = $start; $i < $end; ++
$i) {
699 $mod = new Modification(Modification
::REMOVED
);
700 $mod->id
= $this->deletedID
;
702 // oldTextNodes is used here because we're going to move its deleted
703 // elements to this tree!
704 $this->oldTextNodes
[$i]->modification
= $mod;
706 $this->oldTextNodes
[$start]->modification
->firstOfID
= true;
708 $root = $this->oldTextNodes
[$start]->getLastCommonParent($this->oldTextNodes
[$end-1])->parent
;
710 $junk1 = $junk2 = null;
711 $deletedNodes = $root->getMinimalDeletedSet($this->deletedID
, $junk1, $junk2);
713 //wfDebug("Minimal set of deleted nodes of size " . count($deletedNodes));
715 // Set prevLeaf to the leaf after which the old HTML needs to be
718 $prevLeaf = $this->textNodes
[$before - 1];
720 // Set nextLeaf to the leaf before which the old HTML needs to be
722 if ($before < count($this->textNodes
)) {
723 $nextLeaf = $this->textNodes
[$before];
726 while (count($deletedNodes) > 0) {
727 if (isset($prevLeaf)) {
728 $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
730 $prevResult = new LastCommonParentResult();
731 $prevResult->parent
= $this->bodyNode
;
732 $prevResult->indexInLastCommonParent
= 0;
734 if (isset($nextleaf)) {
735 $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[count($deletedNodes) - 1]);
737 $nextResult = new LastCommonParentResult();
738 $nextResult->parent
= $this->bodyNode
;
739 $nextResult->indexInLastCommonParent
= $this->bodyNode
->getNbChildren();
742 if ($prevResult->lastCommonParentDepth
== $nextResult->lastCommonParentDepth
) {
743 // We need some metric to choose which way to add-...
744 if ($deletedNodes[0]->parent
=== $deletedNodes[count($deletedNodes) - 1]->parent
745 && $prevResult->parent
=== $nextResult->parent
) {
746 // The difference is not in the parent
747 $prevResult->lastCommonParentDepth
= $prevResult->lastCommonParentDepth +
1;
749 // The difference is in the parent, so compare them
750 // now THIS is tricky
751 $distancePrev = $deletedNodes[0]->parent
->getMatchRatio($prevResult->parent
);
752 $distanceNext = $deletedNodes[count($deletedNodes) - 1]->parent
->getMatchRatio($nextResult->parent
);
754 if ($distancePrev <= $distanceNext) {
755 $prevResult->lastCommonParentDepth
= $prevResult->lastCommonParentDepth +
1;
757 $nextResult->lastCommonParentDepth
= $nextResult->lastCommonParentDepth +
1;
763 if ($prevResult->lastCommonParentDepth
> $nextResult->lastCommonParentDepth
) {
764 // Inserting at the front
765 if ($prevResult->splittingNeeded
) {
766 $prevLeaf->parent
->splitUntil($prevResult->parent
, $prevLeaf, true);
768 $prevLeaf = $deletedNodes[0]->copyTree();
769 unset($deletedNodes[0]);
770 $deletedNodes = array_values($deletedNodes);
771 $prevLeaf->setParent($prevResult->parent
);
772 $prevResult->parent
->addChildAbsolute($prevLeaf,$prevResult->indexInLastCommonParent +
1);
773 } else if ($prevResult->lastCommonParentDepth
< $nextResult->lastCommonParentDepth
) {
774 // Inserting at the back
775 if ($nextResult->splittingNeeded
) {
776 $splitOccured = $nextLeaf->parent
->splitUntil($nextResult->parent
, $nextLeaf, false);
778 // The place where to insert is shifted one place to the
780 $nextResult->indexInLastCommonParent
= $nextResult->indexInLastCommonParent +
1;
783 $nextLeaf = $deletedNodes[count(deletedNodes
) - 1]->copyTree();
784 unset($deletedNodes[count(deletedNodes
) - 1]);
785 $deletedNodes = array_values($deletedNodes);
786 $nextLeaf->setParent($nextResult->parent
);
787 $nextResult->parent
->addChildAbsolute($nextLeaf,$nextResult->indexInLastCommonParent
);
789 throw new Exception("Uh?");
795 public function expandWhiteSpace() {
796 $this->bodyNode
->expandWhiteSpace();
799 public function lengthNew(){
800 return count($this->textNodes
);
803 public function lengthOld(){
804 return count($this->oldTextNodes
);
812 function __construct($output) {
813 $this->output
= $output;
816 function htmlDiff($from, $to) {
817 wfProfileIn( __METHOD__
);
818 // Create an XML parser
819 $xml_parser = xml_parser_create('');
821 $domfrom = new DomTreeBuilder();
823 // Set the functions to handle opening and closing tags
824 xml_set_element_handler($xml_parser, array($domfrom, "startElement"), array($domfrom, "endElement"));
826 // Set the function to handle blocks of character data
827 xml_set_character_data_handler($xml_parser, array($domfrom, "characters"));
829 //wfDebug('Parsing '.strlen($from)." characters worth of HTML\n");
830 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer
::hackDocType().'<body>', false)
831 ||
!xml_parse($xml_parser, $from, false)
832 ||
!xml_parse($xml_parser, '</body>', true)){
833 $error = xml_error_string(xml_get_error_code($xml_parser));
834 $line = xml_get_current_line_number($xml_parser);
835 wfDebug("XML error: $error at line $line\n");
837 xml_parser_free($xml_parser);
840 $xml_parser = xml_parser_create('');
842 $domto = new DomTreeBuilder();
844 // Set the functions to handle opening and closing tags
845 xml_set_element_handler($xml_parser, array($domto, "startElement"), array($domto, "endElement"));
847 // Set the function to handle blocks of character data
848 xml_set_character_data_handler($xml_parser, array($domto, "characters"));
850 //wfDebug('Parsing '.strlen($to)." characters worth of HTML\n");
851 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer
::hackDocType().'<body>', false)
852 ||
!xml_parse($xml_parser, $to, false)
853 ||
!xml_parse($xml_parser, '</body>', true)){
854 $error = xml_error_string(xml_get_error_code($xml_parser));
855 $line = xml_get_current_line_number($xml_parser);
856 wfDebug("XML error: $error at line $line\n");
858 xml_parser_free($xml_parser);
861 $diffengine = new WikiDiff3();
862 $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
863 unset($xml_parser, $diffengine);
865 $domdiffer = new TextNodeDiffer($domto, $domfrom);
867 $currentIndexLeft = 0;
868 $currentIndexRight = 0;
869 foreach ($differences as &$d) {
870 if ($d->leftstart
> $currentIndexLeft) {
871 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart
,
872 $currentIndexRight, $d->rightstart
);
874 if ($d->leftlength
> 0) {
875 $domdiffer->markAsDeleted($d->leftstart
, $d->leftend
, $d->rightstart
);
877 $domdiffer->markAsNew($d->rightstart
, $d->rightend
);
879 $currentIndexLeft = $d->leftend
;
880 $currentIndexRight = $d->rightend
;
882 $oldLength = $domdiffer->lengthOld();
883 if ($currentIndexLeft < $oldLength) {
884 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $oldLength, $currentIndexRight, $domdiffer->lengthNew());
886 $domdiffer->expandWhiteSpace();
887 $output = new HTMLOutput('htmldiff', $this->output
);
888 $output->parse($domdiffer->bodyNode
);
889 wfProfileOut( __METHOD__
);
892 private function preProcess(/*array*/ $differences) {
893 $newRanges = array();
895 $nbDifferences = count($differences);
896 for ($i = 0; $i < $nbDifferences; ++
$i) {
897 $leftStart = $differences[$i]->leftstart
;
898 $leftEnd = $differences[$i]->leftend
;
899 $rightStart = $differences[$i]->rightstart
;
900 $rightEnd = $differences[$i]->rightend
;
902 $leftLength = $leftEnd - $leftStart;
903 $rightLength = $rightEnd - $rightStart;
905 while ($i +
1 < $nbDifferences && self
::score($leftLength,
906 $differences[$i +
1]->leftlength
,
908 $differences[$i +
1]->rightlength
)
909 > ($differences[$i +
1]->leftstart
- $leftEnd)) {
910 $leftEnd = $differences[$i +
1]->leftend
;
911 $rightEnd = $differences[$i +
1]->rightend
;
912 $leftLength = $leftEnd - $leftStart;
913 $rightLength = $rightEnd - $rightStart;
916 $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
922 * Heuristic to merge differences for readability.
924 public static function score($ll, $nll, $rl, $nrl) {
925 if (($ll == 0 && $nll == 0)
926 ||
($rl == 0 && $nrl == 0)) {
929 $numbers = array($ll, $nll, $rl, $nrl);
931 foreach ($numbers as &$number) {
932 while ($number > 3) {
940 return $d / (1.5 * count($numbers));
945 class TextOnlyComparator
{
947 public $leafs = array();
949 function _construct(TagNode
$tree) {
950 $this->addRecursive($tree);
951 $this->leafs
= array_map(array('TextNode','toDiffLine'), $this->leafs
);
954 private function addRecursive(TagNode
$tree) {
955 foreach ($tree->children
as &$child) {
956 if ($child instanceof TagNode
) {
957 $this->addRecursive($child);
958 } else if ($child instanceof TextNode
) {
959 $this->leafs
[] = $node;
964 public function getMatchRatio(TextOnlyComparator
$other) {
965 $nbOthers = count($other->leafs
);
966 $nbThis = count($this->leafs
);
967 if($nbOthers == 0 ||
$nbThis == 0){
971 $diffengine = new WikiDiff3(25000, 1.35);
972 $diffengine->diff($this->leafs
, $other->leafs
);
974 $lcsLength = $diffengine->getLcsLength();
976 $distanceThis = $nbThis-$lcsLength;
978 return (2.0 - $lcsLength/$nbOthers - $lcsLength/$nbThis) / 2.0;
982 class AncestorComparatorResult
{
984 public $changed = false;
986 public $changes = "";
990 * A comparator used when calculating the difference in ancestry of two Nodes.
992 class AncestorComparator
{
995 public $ancestorsText;
997 function __construct(/*array*/ $ancestors) {
998 $this->ancestors
= $ancestors;
999 $this->ancestorsText
= array_map(array('TagNode','toDiffLine'), $ancestors);
1002 public $compareTxt = "";
1004 public function getResult(AncestorComparator
$other) {
1005 $result = new AncestorComparatorResult();
1007 $diffengine = new WikiDiff3(10000, 1.35);
1008 $differences = $diffengine->diff_range($other->ancestorsText
,$this->ancestorsText
);
1010 if (count($differences) == 0){
1013 $changeTxt = new ChangeTextGenerator($this, $other);
1015 $result->changed
= true;
1016 $result->changes
= $changeTxt->getChanged($differences)->toString();
1022 class ChangeTextGenerator
{
1024 private $ancestorComparator;
1029 function __construct(AncestorComparator
$ancestorComparator, AncestorComparator
$other) {
1030 $this->ancestorComparator
= $ancestorComparator;
1031 $this->other
= $other;
1032 $this->factory
= new TagToStringFactory();
1035 public function getChanged(/*array*/ $differences) {
1036 $txt = new ChangeText
;
1037 $rootlistopened = false;
1038 if (count($differences) > 1) {
1039 $txt->addHtml('<ul class="changelist">');
1040 $rootlistopened = true;
1042 $nbDifferences = count($differences);
1043 for ($j = 0; $j < $nbDifferences; ++
$j) {
1044 $d = $differences[$j];
1045 $lvl1listopened = false;
1046 if ($rootlistopened) {
1047 $txt->addHtml('<li>');
1049 if ($d->leftlength +
$d->rightlength
> 1) {
1050 $txt->addHtml('<ul class="changelist">');
1051 $lvl1listopened = true;
1053 // left are the old ones
1054 for ($i = $d->leftstart
; $i < $d->leftend
; ++
$i) {
1055 if ($lvl1listopened){
1056 $txt->addHtml('<li>');
1058 // add a bullet for a old tag
1059 $this->addTagOld($txt, $this->other
->ancestors
[$i]);
1060 if ($lvl1listopened){
1061 $txt->addHtml('</li>');
1064 // right are the new ones
1065 for ($i = $d->rightstart
; $i < $d->rightend
; ++
$i) {
1066 if ($lvl1listopened){
1067 $txt->addHtml('<li>');
1069 // add a bullet for a new tag
1070 $this->addTagNew($txt, $this->ancestorComparator
->ancestors
[$i]);
1072 if ($lvl1listopened){
1073 $txt->addHtml('</li>');
1076 if ($lvl1listopened) {
1077 $txt->addHtml('</ul>');
1079 if ($rootlistopened) {
1080 $txt->addHtml('</li>');
1083 if ($rootlistopened) {
1084 $txt->addHtml('</ul>');
1089 private function addTagOld(ChangeText
$txt, TagNode
$ancestor) {
1090 $this->factory
->create($ancestor)->getRemovedDescription($txt);
1093 private function addTagNew(ChangeText
$txt, TagNode
$ancestor) {
1094 $this->factory
->create($ancestor)->getAddedDescription($txt);
1102 public function addHtml($s) {
1106 public function toString() {
1111 class TagToStringFactory
{
1113 private static $containerTags = array('html', 'body', 'p', 'blockquote',
1114 'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li',
1115 'table', 'tbody', 'tr', 'td', 'th', 'br', 'hr', 'code', 'dl',
1116 'dt', 'dd', 'input', 'form', 'img', 'span', 'a');
1118 private static $styleTags = array('i', 'b', 'strong', 'em', 'font',
1119 'big', 'del', 'tt', 'sub', 'sup', 'strike');
1125 public function create(TagNode
$node) {
1126 $sem = $this->getChangeSemantic($node->qName
);
1127 if (strcasecmp($node->qName
,'a') == 0) {
1128 return new AnchorToString($node, $sem);
1130 if (strcasecmp($node->qName
,'img') == 0) {
1131 return new NoContentTagToString($node, $sem);
1133 return new TagToString($node, $sem);
1136 protected function getChangeSemantic($qname) {
1137 if (in_array(strtolower($qname),self
::$containerTags)) {
1140 if (in_array(strtolower($qname),self
::$styleTags)) {
1143 return self
::UNKNOWN
;
1153 function __construct(TagNode
$node, $sem) {
1154 $this->node
= $node;
1158 public function getRemovedDescription(ChangeText
$txt) {
1159 $tagDescription = wfMsgExt('diff-' . $this->node
->qName
, 'parseinline' );
1160 if(!$tagDescription){
1161 $tagDescription = $this->node
->qName
;
1163 if ($this->sem
== TagToStringFactory
::MOVED
) {
1164 $txt->addHtml( wfMsgExt( 'diff-movedoutof', 'parseinline', $tagDescription ) );
1165 } else if ($this->sem
== TagToStringFactory
::STYLE
) {
1166 $txt->addHtml($tagDescription . ' ' . wfMsgExt( 'diff-styleremoved' , 'parseinline' ) );
1168 $txt->addHtml($tagDescription . ' ' . wfMsgExt( 'diff-removed' , 'parseinline' ) );
1170 $this->addAttributes($txt, $this->node
->attributes
);
1174 public function getAddedDescription(ChangeText
$txt) {
1175 $tagDescription = wfMsgExt('diff-' . $this->node
->qName
, 'parseinline' );
1176 if(!$tagDescription){
1177 $tagDescription = $this->node
->qName
;
1179 if ($this->sem
== TagToStringFactory
::MOVED
) {
1180 $txt->addHtml( wfMsgExt( 'diff-movedto' , 'parseinline', $tagDescription) );
1181 } else if ($this->sem
== TagToStringFactory
::STYLE
) {
1182 $txt->addHtml($tagDescription . ' ' . wfMsgExt( 'diff-styleadded', 'parseinline' ) );
1184 $txt->addHtml($tagDescription . ' ' . wfMsgExt( 'diff-added', 'parseinline' ) );
1186 $this->addAttributes($txt, $this->node
->attributes
);
1190 protected function addAttributes(ChangeText
$txt, array $attributes) {
1191 if (count($attributes) < 1) {
1195 $nbAttributes_min_1 = count($attributes)-1;
1196 $keys = array_keys($attributes);
1197 for ($i=0;$i<$nbAttributes_min_1;$i++
) {
1199 $attr = $attributes[$key];
1202 $txt->addHtml( wfMsgExt('diff-with', 'escapenoentities', $this->translateArgument($key), htmlspecialchars($attr) ) );
1205 $txt->addHtml( wfMsgExt( 'comma-separator', 'escapenoentities' ) .
1206 wfMsgExt( 'diff-with-additional', 'escapenoentities',
1207 $this->translateArgument( $key ), htmlspecialchars( $attr ) )
1211 if ($nbAttributes_min_1 > 0) {
1212 $txt->addHtml( wfMsgExt( 'diff-with-final', 'escapenoentities',
1213 $this->translateArgument($keys[$nbAttributes_min_1]),
1214 htmlspecialchars($attributes[$keys[$nbAttributes_min_1]]) ) );
1218 protected function translateArgument($name) {
1219 $translation = wfMsgExt('diff-' . $name, 'parseinline' );
1220 if ( wfEmptyMsg( 'diff-' . $name, $translation ) ) {
1221 $translation = $name;
1223 return htmlspecialchars( $translation );
1227 class NoContentTagToString
extends TagToString
{
1229 function __construct(TagNode
$node, $sem) {
1230 parent
::__construct($node, $sem);
1233 public function getAddedDescription(ChangeText
$txt) {
1234 $tagDescription = wfMsgExt('diff-' . $this->node
->qName
, 'parseinline' );
1235 if(!$tagDescription){
1236 $tagDescription = $this->node
->qName
;
1238 $txt->addHtml( wfMsgExt('diff-changedto', 'parseinline' ) . ' ' . $tagDescription);
1239 $this->addAttributes($txt, $this->node
->attributes
);
1243 public function getRemovedDescription(ChangeText
$txt) {
1244 $txt->addHtml( wfMsgExt('diff-changedfrom', 'parseinline' ) . ' ' . $tagDescription);
1245 $this->addAttributes($txt, $this->node
->attributes
);
1250 class AnchorToString
extends TagToString
{
1252 function __construct(TagNode
$node, $sem) {
1253 parent
::__construct($node, $sem);
1256 protected function addAttributes(ChangeText
$txt, array $attributes) {
1257 if (array_key_exists('href', $attributes)) {
1258 $txt->addHtml(' ' . wfMsgExt( 'diff-withdestination', 'parseinline' ) . ' ' . htmlspecialchars($attributes['href']));
1259 unset($attributes['href']);
1261 parent
::addAttributes($txt, $attributes);
1266 * Takes a branch root and creates an HTML file for it.
1273 function __construct($prefix, $handler) {
1274 $this->prefix
= $prefix;
1275 $this->handler
= $handler;
1278 public function parse(TagNode
$node) {
1279 $handler = &$this->handler
;
1281 if (strcasecmp($node->qName
, 'img') != 0 && strcasecmp($node->qName
, 'body') != 0) {
1282 $handler->startElement($node->qName
, $node->attributes
);
1285 $newStarted = false;
1286 $remStarted = false;
1287 $changeStarted = false;
1290 foreach ($node->children
as &$child) {
1291 if ($child instanceof TagNode
) {
1293 $handler->endElement('span');
1294 $newStarted = false;
1295 } else if ($changeStarted) {
1296 $handler->endElement('span');
1297 $changeStarted = false;
1298 } else if ($remStarted) {
1299 $handler->endElement('span');
1300 $remStarted = false;
1302 $this->parse($child);
1303 } else if ($child instanceof TextNode
) {
1304 $mod = $child->modification
;
1306 if ($newStarted && ($mod->type
!= Modification
::ADDED ||
$mod->firstOfID
)) {
1307 $handler->endElement('span');
1308 $newStarted = false;
1309 } else if ($changeStarted && ($mod->type
!= Modification
::CHANGED
1310 ||
$mod->changes
!= $changeTXT ||
$mod->firstOfID
)) {
1311 $handler->endElement('span');
1312 $changeStarted = false;
1313 } else if ($remStarted && ($mod->type
!= Modification
::REMOVED ||
$mod ->firstOfID
)) {
1314 $handler->endElement('span');
1315 $remStarted = false;
1318 // no else because a removed part can just be closed and a new
1320 if (!$newStarted && $mod->type
== Modification
::ADDED
) {
1321 $attrs = array('class' => 'diff-html-added');
1322 if ($mod->firstOfID
) {
1323 $attrs['id'] = "added-{$this->prefix}-{$mod->id}";
1325 $handler->startElement('span', $attrs);
1327 } else if (!$changeStarted && $mod->type
== Modification
::CHANGED
) {
1328 $attrs = array('class' => 'diff-html-changed');
1329 if ($mod->firstOfID
) {
1330 $attrs['id'] = "changed-{$this->prefix}-{$mod->id}";
1332 $handler->startElement('span', $attrs);
1335 $handler->startElement('span', array('class' => 'tip'));
1336 $handler->html($mod->changes
);
1337 $handler->endElement('span');
1339 $changeStarted = true;
1340 $changeTXT = $mod->changes
;
1341 } else if (!$remStarted && $mod->type
== Modification
::REMOVED
) {
1342 $attrs = array('class'=>'diff-html-removed');
1343 if ($mod->firstOfID
) {
1344 $attrs['id'] = "removed-{$this->prefix}-{$mod->id}";
1346 $handler->startElement('span', $attrs);
1350 $chars = $child->text
;
1352 if ($child instanceof ImageNode
) {
1353 $this->writeImage($child);
1355 $handler->characters($chars);
1361 $handler->endElement('span');
1362 $newStarted = false;
1363 } else if ($changeStarted) {
1364 $handler->endElement('span');
1365 $changeStarted = false;
1366 } else if ($remStarted) {
1367 $handler->endElement('span');
1368 $remStarted = false;
1371 if (strcasecmp($node->qName
, 'img') != 0
1372 && strcasecmp($node->qName
, 'body') != 0) {
1373 $handler->endElement($node->qName
);
1377 private function writeImage(ImageNode
$imgNode) {
1378 $attrs = $imgNode->attributes
;
1379 $this->handler
->startElement('img', $attrs);
1380 $this->handler
->endElement('img');
1384 class EchoingContentHandler
{
1386 function startElement($qname, /*array*/ $arguments) {
1387 echo Xml
::openElement($qname, $arguments);
1390 function endElement($qname){
1391 echo Xml
::closeElement($qname);
1394 function characters($chars){
1395 echo htmlspecialchars($chars);
1398 function html($html){
1404 class DelegatingContentHandler
{
1408 function __construct($delegate) {
1409 $this->delegate
= $delegate;
1412 function startElement($qname, /*array*/ $arguments) {
1413 $this->delegate
->addHtml(Xml
::openElement($qname, $arguments));
1416 function endElement($qname){
1417 $this->delegate
->addHtml(Xml
::closeElement($qname));
1420 function characters($chars){
1421 $this->delegate
->addHtml(htmlspecialchars($chars));
1424 function html($html){
1425 $this->delegate
->addHtml($html);