*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
* or see http://www.gnu.org/
*/
/**
* Any element in the DOM tree of an HTML document.
*/
abstract class Node{
protected $parent;
function __construct($parent){
$this->parent = $parent;
if(!is_null($parent)){
$parent->addChild($this);
}
}
public function getParent(){
return $this->parent;
}
public function getParentTree(){
if(!is_null($this->parent)){
$parentTree = $this->parent->getParentTree();
$parentTree[] = $this->parent;
return $parentTree;
}else{
return array();
}
}
public abstract function getMinimalDeletedSet($id);
public function detectIgnorableWhiteSpace(){
// no op
}
public function getLastCommonParent(Node $other){
if(is_null($other))
throw new Exception('The given node is NULL');
$result = new LastCommonParentResult();
$myParents = $this->getParentTree();
$otherParents = $other->getParentTree();
$i = 1;
$isSame = true;
while ($isSame && $i < sizeof($myParents) && $i < sizeof($otherParents)) {
if (!$myParents[$i]->isSameTag($otherParents[$i])) {
$isSame = false;
} else {
// After the while, the index i-1 must be the last common parent
$i++;
}
}
$result->setLastCommonParentDepth($i - 1);
$result->setLastCommonParent($myParents[$i - 1]);
if (!$isSame) {
$result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
$result->setSplittingNeeded();
} else if (sizeof($myParents) < sizeof($otherParents)) {
$result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
} else if (sizeof($myParents) > sizeof($otherParents)) {
// All tags matched but there are tags left in this tree
$result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
$result->setSplittingNeeded();
} else {
// All tags matched untill the very last one in both trees
// or there were no tags besides the BODY
$result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
}
return $result;
}
public function setParent($parent) {
$this->parent = $parent;
}
public abstract function copyTree();
public function inPre() {
$tree = $this->getParentTree();
foreach ($tree as $ancestor) {
if ($ancestor->isPre()) {
return true;
}
}
return false;
}
private $whiteBefore = false;
private $whiteAfter = false;
public function isWhiteBefore() {
return $this->whiteBefore;
}
public function setWhiteBefore($whiteBefore) {
$this->whiteBefore = $whiteBefore;
}
public function isWhiteAfter() {
return $this->whiteAfter;
}
public function setWhiteAfter($whiteAfter) {
$this->whiteAfter = $whiteAfter;
}
public abstract function getLeftMostChild();
public abstract function getRightMostChild();
}
/**
* Node that can contain other nodes. Represents an HTML tag.
*/
class TagNode extends Node{
public $children = array();
protected $qName;
protected $attributes = array();
function __construct($parent, $qName, /*array*/ $attributes) {
parent::__construct($parent);
$this->qName = strtolower($qName);
foreach($attributes as $key => $value){
$this->attributes[strtolower($key)] = $value;
}
}
public function addChild(Node $node, $index=-1) {
if ($node->getParent() !== $this)
throw new Exception(
'The new child must have this node as a parent.');
if($index == -1){
$this->children[] = $node;
}else{
array_splice($this->children,$index,0,array($node));
}
}
public function getIndexOf(Node $child) {
// don't trust array_search with objects
foreach($this->children as $key=>$value){
if($value === $child){
return $key;
}
}
return NULL;
}
public function getChild($i) {
return $this->children[$i];
}
public function getNbChildren() {
return count($this->children);
}
public function getQName() {
return $this->qName;
}
public function getAttributes() {
return $this->attributes;
}
public function isSameTag(TagNode $other) {
if (is_null($other))
return false;
return $this->getOpeningTag() === $other->getOpeningTag();
}
public function getOpeningTag() {
$s = '<'.$this->getQName();
foreach ($this->attributes as $attribute => $value) {
$s .= ' ' . $attribute . '="' . $value . '"';
}
return $s .= '>';
}
public function getEndTag() {
return '' . $this->getQName() + '>"';
}
public function getMinimalDeletedSet($id) {
$nodes = array();
if ($this->getNbChildren() == 0)
return $nodes;
$hasNotDeletedDescendant = false;
foreach ($this->children as $child) {
$childrenChildren = $child->getMinimalDeletedSet($id);
$nodes = array_merge($nodes, $childrenChildren);
if (!$hasNotDeletedDescendant
&& !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) {
// This child is not entirely deleted
$hasNotDeletedDescendant = true;
}
}
if (!$hasNotDeletedDescendant) {
$nodes = array($this);
}
return $nodes;
}
public function toString() {
return $this->getOpeningTag();
}
public function splitUntill(TagNode $parent, Node $split, $includeLeft) {
$splitOccured = false;
if ($parent !== $this) {
$part1 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
$part2 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
$part1->setParent($this->getParent());
$part2->setParent($this->getParent());
$i = 0;
$nbChildren = $this->getNbChildren();
while ($i < $nbChildren && $this->children[$i] !== $split) {
$this->children[$i]->setParent($part1);
$part1->addChild($this->children[$i]);
++$i;
}
if ($i < $nbChildren) {
if ($includeLeft) {
$this->children[$i]->setParent($part1);
$part1->addChild($this->children[$i]);
} else {
$this->children[$i]->setParent($part2);
$part2->addChild($this->children[$i]);
}
++$i;
}
while ($i < $nbChildren) {
$this->children[$i]->setParent($part2);
$part2->addChild($this->children[$i]);
++$i;
}
$myindexinparent = $this->parent->getIndexOf($this);
if ($part1->getNbChildren() > 0)
$this->parent->addChild($part1,$myindexinparent);
if ($part2->getNbChildren() > 0)
$this->parent->addChild($part2,$myindexinparent);
if ($part1->getNbChildren() > 0 && $part2->getNbChildren() > 0) {
$splitOccured = true;
}
$this->parent->removeChild($myindexinparent);
if ($includeLeft)
$this->parent->splitUntill($parent, $part1, $includeLeft);
else
$this->parent->splitUntill($parent, $part2, $includeLeft);
}
return $splitOccured;
}
private function removeChild($index) {
unset($this->children[$index]);
$this->children = array_values($this->children);
}
public static $blocks = array('html'=>TRUE,'body'=>TRUE,'p'=>TRUE,'blockquote'=>TRUE,
'h1'=>TRUE,'h2'=>TRUE,'h3'=>TRUE,'h4'=>TRUE,'h5'=>TRUE,'pre'=>TRUE,'div'=>TRUE,'ul'=>TRUE,'ol'=>TRUE,'li'=>TRUE,
'table'=>TRUE,'tbody'=>TRUE,'tr'=>TRUE,'td'=>TRUE,'th'=>TRUE,'br'=>TRUE);
public static function isBlockLevelName($qName) {
return array_key_exists(strtolower($qName),self::$blocks);
}
public static function isBlockLevelNode(Node $node) {
if(! $node instanceof TagNode)
return false;
return self::isBlockLevelName($node->getQName());
}
public function isBlockLevel() {
return self::isBlockLevelNode($this);
}
public static function isInlineName($qName) {
return !self::isBlockLevelName($qName);
}
public static function isInlineNode(Node $node) {
return !self::isBlockLevelNode($node);
}
public function isInline() {
return self::isInlineNode($this);
}
public function copyTree() {
$newThis = new TagNode(NULL, $this->getQName(), $this->getAttributes());
$newThis->setWhiteBefore($this->isWhiteBefore());
$newThis->setWhiteAfter($this->isWhiteAfter());
foreach($this->children as $child) {
$newChild = $child->copyTree();
$newChild->setParent($newThis);
$newThis->addChild($newChild);
}
return $newThis;
}
public function getMatchRatio(TagNode $other) {
$txtComp = new TextOnlyComparator($other);
return $txtComp->getMatchRatio(new TextOnlyComparator($this));
}
public function expandWhiteSpace() {
$shift = 0;
$spaceAdded = false;
$nbOriginalChildren = $this->getNbChildren();
for ($i = 0; $i < $nbOriginalChildren; ++$i) {
$child = $this->getChild($i + $shift);
if($child instanceof TagNode){
if (!$child->isPre()) {
$child->expandWhiteSpace();
}
}
if (!$spaceAdded && $child->isWhiteBefore()) {
$ws = new WhiteSpaceNode(NULL, ' ', $child->getLeftMostChild());
$ws->setParent($this);
$this->addChild($ws,$i + ($shift++));
}
if ($child->isWhiteAfter()) {
$ws = new WhiteSpaceNode(NULL, ' ', $child->getRightMostChild());
$ws->setParent($this);
$this->addChild($ws,$i + 1 + ($shift++));
$spaceAdded = true;
} else {
$spaceAdded = false;
}
}
}
public function getLeftMostChild() {
if ($this->getNbChildren() < 1)
return $this;
return $this->getChild(0)->getLeftMostChild();
}
public function getRightMostChild() {
if ($this->getNbChildren() < 1)
return $this;
return $this->getChild($this->getNbChildren() - 1)->getRightMostChild();
}
public function isPre() {
return 0 == strcasecmp($this->getQName(),'pre');
}
public static function toDiffLine(TagNode $node){
return $node->getOpeningTag();
}
}
/**
* Represents a piece of text in the HTML file.
*/
class TextNode extends Node{
private $s;
private $modification;
function __construct($parent, $s) {
parent::__construct($parent);
$this->modification = new Modification(Modification::NONE);
$this->s = $s;
}
public function copyTree() {
$clone = clone $this;
$clone->setParent(NULL);
return $clone;
}
public function getLeftMostChild() {
return $this;
}
public function getRightMostChild() {
return $this;
}
public function getMinimalDeletedSet($id) {
if ($this->getModification()->getType() == Modification::REMOVED
&& $this->getModification()->getID() == $id){
return array($this);
}
return array();
}
public function getModification() {
return $this->modification;
}
public function getText() {
return $this->s;
}
public function isSameText($other) {
if (is_null($other) || ! $other instanceof TextNode){
return false;
}
return str_replace('\n', ' ',$this->getText()) === str_replace('\n', ' ',$other->getText());
}
public function setModification(Modification $m) {
//wfDebug("Registered modification for node '$this->s' as ".Modification::typeToString($m->getType()));
$this->modification = $m;
}
public function toString() {
return $this->getText();
}
public static function toDiffLine(TextNode $node){
return str_replace('\n', ' ',$node->getText());
}
}
class WhiteSpaceNode extends TextNode {
function __construct($parent, $s, Node $like = NULL) {
parent::__construct($parent, $s);
if(!is_null($like) && $like instanceof TextNode){
$newModification = clone $like->getModification();
$newModification->setFirstOfID(false);
$this->setModification($newModification);
}
}
public static function isWhiteSpace($c) {
switch ($c) {
case ' ':
case '\t':
case '\r':
case '\n':
return true;
default:
return false;
}
}
}
/**
* Represents the root of a HTML document.
*/
class BodyNode extends TagNode {
function __construct() {
parent::__construct(NULL, 'body', array());
}
public function copyTree() {
$newThis = new BodyNode();
foreach ($this->children as $child) {
$newChild = $child->copyTree();
$newChild->setParent($newThis);
$newThis->addChild($newChild);
}
return $newThis;
}
public function getMinimalDeletedSet($id) {
$nodes = array();
foreach ($this->children as $child) {
$childrenChildren = $child->getMinimalDeletedSet($id);
$nodes = array_merge($nodes, $childrenChildren);
}
return $nodes;
}
}
/**
* Represents an image in HTML. Even though images do not contain any text they
* are independent visible objects on the page. They are logically a TextNode.
*/
class ImageNode extends TextNode {
private $attributes;
function __construct(TagNode $parent, /*array*/ $attrs) {
if(!array_key_exists('src',$attrs)){
//wfDebug('Image without a source:');
foreach($attrs as $key => $value){
//wfDebug("$key = $value");
}
parent::__construct($parent, '');
}else{
parent::__construct($parent, '
' . strtolower($attrs['src']) . '');
}
$this->attributes = $attrs;
}
public function isSameText($other) {
if (is_null($other) || ! $other instanceof ImageNode)
return false;
return $this->getText() === $other->getText();
}
public function getAttributes() {
return $this->attributes;
}
}
/**
* When detecting the last common parent of two nodes, all results are stored as
* a LastCommonParentResult.
*/
class LastCommonParentResult {
// Parent
private $parent;
public function getLastCommonParent() {
return $this->parent;
}
public function setLastCommonParent(TagNode $parent) {
$this->parent = $parent;
}
// Splitting
private $splittingNeeded = false;
public function isSplittingNeeded() {
return $this->splittingNeeded;
}
public function setSplittingNeeded() {
$this->splittingNeeded = true;
}
// Depth
private $lastCommonParentDepth = -1;
public function getLastCommonParentDepth() {
return $this->lastCommonParentDepth;
}
public function setLastCommonParentDepth($depth) {
$this->lastCommonParentDepth = $depth;
}
// Index
private $indexInLastCommonParent = -1;
public function getIndexInLastCommonParent() {
return $this->indexInLastCommonParent;
}
public function setIndexInLastCommonParent($index) {
$this->indexInLastCommonParent = $index;
}
}
class Modification{
const NONE = 1;
const REMOVED = 2;
const ADDED = 4;
const CHANGED = 8;
private $type;
private $id = -1;
private $prevMod;
private $nextMod;
private $firstOfID = false;
function __construct($type) {
$this->type = $type;
}
public function copy() {
$newM = new Modification($this->getType());
$newM->setID($this->getID());
$newM->setChanges($this->getChanges());
$newM->setFirstOfID($this->isFirstOfID());
$newM->setNext($this->getNext());
$newM->setPrevious($this->getPrevious());
return $newM;
}
public function getType() {
return $this->type;
}
public function setID($id) {
$this->id = $id;
}
public function getID() {
return $this->id;
}
public function setPrevious($m) {
$this->prevMod = $m;
}
public function getPrevious() {
return $this->prevMod;
}
public function setNext($m) {
$this->nextMod = $m;
}
public function getNext() {
return $this->nextMod;
}
private $changes;
public function setChanges($changes) {
$this->changes = $changes;
}
public function getChanges() {
return $this->changes;
}
public function isFirstOfID() {
return $this->firstOfID;
}
public function setFirstOfID($firstOfID) {
$this->firstOfID = $firstOfID;
}
public static function typeToString($type){
switch($type){
case self::NONE: return 'none';
case self::REMOVED: return 'removed';
case self::ADDED: return 'added';
case self::CHANGED: return 'changed';
}
}
}
class DomTreeBuilder {
private $textNodes = array();
private $bodyNode;
private $currentParent;
private $newWord = "";
protected $bodyStarted = false;
protected $bodyEnded = false;
private $whiteSpaceBeforeThis = false;
private $lastSibling;
function __construct(){
$this->bodyNode = $this->currentParent = new BodyNode();
}
public function getBodyNode() {
return $this->bodyNode;
}
public function getTextNodes() {
return $this->textNodes;
}
/**
* Must be called manually
*/
public function endDocument() {
$this->endWord();
//wfDebug(sizeof($this->textNodes) . ' text nodes in document.');
}
public function startElement($parser, $name, /*array*/ $attributes) {
if(!strcasecmp($name, 'body')==0){
//wfDebug("Starting $name node.");
$this->endWord();
$newTagNode = new TagNode($this->currentParent, $name, $attributes);
$this->currentParent = $newTagNode;
$this->lastSibling = NULL;
if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) {
$newTagNode->setWhiteBefore(true);
}
$this->whiteSpaceBeforeThis = false;
}
}
public function endElement($parser, $name) {
if(!strcasecmp($name, 'body')==0){
//wfDebug("Ending $name node.");
if (0 == strcasecmp($name,'img')) {
// Insert a dummy leaf for the image
$img = new ImageNode($this->currentParent, $this->currentParent->getAttributes());
$img->setWhiteBefore($this->whiteSpaceBeforeThis);
$this->lastSibling = $img;
$this->textNodes[] = $img;
}
$this->endWord();
if ($this->currentParent->isInline()) {
$this->lastSibling = $this->currentParent;
} else {
$this->lastSibling = NULL;
}
$this->currentParent = $this->currentParent->getParent();
$this->whiteSpaceBeforeThis = false;
}else{
$this->endDocument();
}
}
public function characters($parser, $data){
//wfDebug('Parsing '. strlen($data).' characters.');
$array = str_split($data);
foreach($array as $c) {
if (self::isDelimiter($c)) {
$this->endWord();
if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre()
&& !$this->currentParent->inPre()) {
if (!is_null($this->lastSibling)){
$this->lastSibling->setWhiteAfter(true);
}
$this->whiteSpaceBeforeThis = true;
} else {
$textNode = new TextNode($this->currentParent, $c);
$textNode->setWhiteBefore($this->whiteSpaceBeforeThis);
$this->whiteSpaceBeforeThis = false;
$this->lastSibling = $textNode;
$this->textNodes[] = $textNode;
}
} else {
$this->newWord .= $c;
}
}
}
private function endWord() {
if (strlen($this->newWord) > 0) {
$node = new TextNode($this->currentParent, $this->newWord);
$node->setWhiteBefore($this->whiteSpaceBeforeThis);
$this->whiteSpaceBeforeThis = false;
$this->lastSibling = $node;
$this->textNodes[] = $node;
$this->newWord = "";
}
}
public static function isDelimiter($c) {
switch ($c) {
// Basic Delimiters
case '/':
case '.':
case '!':
case ',':
case ';':
case '?':
case '=':
case '\'':
case '"':
// Extra Delimiters
case '[':
case ']':
case '{':
case '}':
case '(':
case ')':
case '&':
case '|':
case '\\':
case '-':
case '_':
case '+':
case '*':
case ':':
return true;
default:
return WhiteSpaceNode::isWhiteSpace($c);
}
}
public function getDiffLines(){
return array_map(array('TextNode','toDiffLine'), $this->textNodes);
}
}
class TextNodeDiffer{
private $textNodes;
private $bodyNode;
private $oldTextNodes;
private $oldBodyNode;
private $lastModified = array();
function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
$this->textNodes = $tree->getTextNodes();
$this->bodyNode = $tree->getBodyNode();
$this->oldTextNodes = $oldTree->getTextNodes();
$this->oldBodyNode = $oldTree->getBodyNode();
}
public function getBodyNode() {
return $this->bodyNode;
}
private $newID = 0;
public function markAsNew($start, $end) {
if ($end <= $start)
return;
if ($this->whiteAfterLastChangedPart)
$this->textNodes[$start]->setWhiteBefore(false);
$nextLastModified = array();
for ($i = $start; $i < $end; ++$i) {
$mod = new Modification(Modification::ADDED);
$mod->setID($this->newID);
if (sizeof($this->lastModified) > 0) {
$mod->setPrevious($this->lastModified[0]);
if (is_null($this->lastModified[0]->getNext())) {
foreach ($this->lastModified as $lastMod) {
$lastMod->setNext($mod);
}
}
}
$nextLastModified[] = $mod;
$this->textNodes[$i]->setModification($mod);
}
if ($start < $end) {
$this->textNodes[$start]->getModification()->setFirstOfID(true);
}
++$this->newID;
$this->lastModified = $nextLastModified;
}
private $changedID = 0;
private $changedIDUsed = false;
public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
$i = $rightstart;
$j = $leftstart;
if ($this->changedIDUsed) {
++$this->changedID;
$this->changedIDUsed = false;
}
$nextLastModified = array();
$changes;
while ($i < $rightend) {
$acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
$acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
$result = $acthis->getResult($acother);
unset($acthis, $acother);
$nbLastModified = sizeof($this->lastModified);
if ($result->isChanged()) {
$mod = new Modification(Modification::CHANGED);
if (!$this->changedIDUsed) {
$mod->setFirstOfID(true);
if (sizeof($nextLastModified) > 0) {
$this->lastModified = $nextLastModified;
$nextLastModified = array();
}
} else if (!is_null($result->getChanges()) && $result->getChanges() != $this->changes) {
++$this->changedID;
$mod->setFirstOfID(true);
if (sizeof($nextLastModified) > 0) {
$this->lastModified = $nextLastModified;
$nextLastModified = array();
}
}
if ($nbLastModified > 0) {
$mod->setPrevious($this->lastModified[0]);
if (is_null($this->lastModified[0]->getNext())) {
foreach ($this->lastModified as $lastMod) {
$lastMod->setNext($mod);
}
}
}
$nextLastModified[] = $mod;
$mod->setChanges($result->getChanges());
$mod->setID($this->changedID);
$this->textNodes[$i]->setModification($mod);
$this->changes = $result->getChanges();
$this->changedIDUsed = true;
} else if ($this->changedIDUsed) {
++$this->changedID;
$this->changedIDUsed = false;
}
++$i;
++$j;
}
if (sizeof($nextLastModified) > 0){
$this->lastModified = $nextLastModified;
}
}
// used to remove the whitespace between a red and green block
private $whiteAfterLastChangedPart = false;
private $deletedID = 0;
public function markAsDeleted($start, $end, $before) {
if ($end <= $start)
return;
if ($before > 0 && $this->textNodes[$before - 1]->isWhiteAfter()) {
$this->whiteAfterLastChangedPart = true;
} else {
$this->whiteAfterLastChangedPart = false;
}
$nextLastModified = array();
for ($i = $start; $i < $end; ++$i) {
$mod = new Modification(Modification::REMOVED);
$mod->setID($this->deletedID);
if (sizeof($this->lastModified) > 0) {
$mod->setPrevious($this->lastModified[0]);
if (is_null($this->lastModified[0]->getNext())) {
foreach ($this->lastModified as $lastMod) {
$lastMod->setNext($mod);
}
}
}
$nextLastModified[] = $mod;
// oldTextNodes is used here because we're going to move its deleted
// elements
// to this tree!
$this->oldTextNodes[$i]->setModification($mod);
}
$this->oldTextNodes[$start]->getModification()->setFirstOfID(true);
$deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID);
//wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes));
// Set prevLeaf to the leaf after which the old HTML needs to be
// inserted
if ($before > 0){
$prevLeaf = $this->textNodes[$before - 1];
}
// Set nextLeaf to the leaf before which the old HTML needs to be
// inserted
if ($before < sizeof($this->textNodes)){
$nextLeaf = $this->textNodes[$before];
}
while (sizeof($deletedNodes) > 0) {
if (isset($prevLeaf)) {
$prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
} else {
$prevResult = new LastCommonParentResult();
$prevResult->setLastCommonParent($this->getBodyNode());
$prevResult->setIndexInLastCommonParent(0);
}
if (isset($nextleaf)) {
$nextResult = $nextLeaf->getLastCommonParent($deletedNodes[sizeof($deletedNodes) - 1]);
} else {
$nextResult = new LastCommonParentResult();
$nextResult->setLastCommonParent($this->getBodyNode());
$nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNbChildren());
}
if ($prevResult->getLastCommonParentDepth() == $nextResult->getLastCommonParentDepth()) {
// We need some metric to choose which way to add-...
if ($deletedNodes[0]->getParent() === $deletedNodes[sizeof($deletedNodes) - 1]->getParent()
&& $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent()) {
// The difference is not in the parent
$prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
} else {
// The difference is in the parent, so compare them
// now THIS is tricky
$distancePrev = $deletedNodes[0]->getParent()->getMatchRatio($prevResult->getLastCommonParent());
$distanceNext = $deletedNodes[sizeof($deletedNodes) - 1]->getParent()->getMatchRatio($nextResult->getLastCommonParent());
if ($distancePrev <= $distanceNext) {
$prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
} else {
$nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1);
}
}
}
if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) {
// Inserting at the front
if ($prevResult->isSplittingNeeded()) {
$prevLeaf->getParent()->splitUntill($prevResult->getLastCommonParent(), $prevLeaf, true);
}
$prevLeaf = $deletedNodes[0]->copyTree();
unset($deletedNodes[0]);
$deletedNodes = array_values($deletedNodes);
$prevLeaf->setParent($prevResult->getLastCommonParent());
$prevResult->getLastCommonParent()->addChild($prevLeaf,$prevResult->getIndexInLastCommonParent() + 1);
} else if ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) {
// Inserting at the back
if ($nextResult->isSplittingNeeded()) {
$splitOccured = $nextLeaf->getParent()->splitUntill($nextResult->getLastCommonParent(), $nextLeaf, false);
if ($splitOccured) {
// The place where to insert is shifted one place to the
// right
$nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1);
}
}
$nextLeaf = $deletedNodes[sizeof(deletedNodes) - 1]->copyTree();
unset($deletedNodes[sizeof(deletedNodes) - 1]);
$deletedNodes = array_values($deletedNodes);
$nextLeaf->setParent($nextResult->getLastCommonParent());
$nextResult->getLastCommonParent()->addChild($nextLeaf,$nextResult->getIndexInLastCommonParent());
} else
throw new Exception("Uh?");
}
$this->lastModified = $nextLastModified;
++$this->deletedID;
}
public function expandWhiteSpace() {
$this->getBodyNode()->expandWhiteSpace();
}
public function lengthNew(){
return sizeof($this->textNodes);
}
public function lengthOld(){
return sizeof($this->oldTextNodes);
}
}
class HTMLDiffer{
private $output;
function __construct($output){
$this->output = $output;
}
function htmlDiff($from, $to){
// Create an XML parser
$xml_parser = xml_parser_create('');
$domfrom = new DomTreeBuilder();
// Set the functions to handle opening and closing tags
xml_set_element_handler($xml_parser, array($domfrom,"startElement"), array($domfrom,"endElement"));
// Set the function to handle blocks of character data
xml_set_character_data_handler($xml_parser, array($domfrom,"characters"));
;
//wfDebug('Parsing '.strlen($from)." characters worth of HTML");
if (!xml_parse($xml_parser, ''.Sanitizer::hackDocType().'