3 /** Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * or see http://www.gnu.org/
23 * Any element in the DOM tree of an HTML document.
24 * @ingroup DifferenceEngine
30 protected $parentTree;
32 public $whiteBefore = false;
34 public $whiteAfter = false;
36 function __construct($parent) {
37 $this->parent
= $parent;
40 public function getParentTree() {
41 if (!isset($this->parentTree
)) {
42 if (!is_null($this->parent
)) {
43 $this->parentTree
= $this->parent
->getParentTree();
44 $this->parentTree
[] = $this->parent
;
46 $this->parentTree
= array();
49 return $this->parentTree
;
52 public function getLastCommonParent(Node
$other) {
53 $result = new LastCommonParentResult();
55 $myParents = $this->getParentTree();
56 $otherParents = $other->getParentTree();
60 $nbMyParents = count($myParents);
61 $nbOtherParents = count($otherParents);
62 while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) {
63 if (!$myParents[$i]->openingTag
=== $otherParents[$i]->openingTag
) {
66 // After a while, the index i-1 must be the last common parent
71 $result->lastCommonParentDepth
= $i - 1;
72 $result->parent
= $myParents[$i - 1];
74 if (!$isSame ||
$nbMyParents > $nbOtherParents) {
75 // Not all tags matched, or all tags matched but
76 // there are tags left in this tree
77 $result->indexInLastCommonParent
= $myParents[$i - 1]->getIndexOf($myParents[$i]);
78 $result->splittingNeeded
= true;
79 } else if ($nbMyParents <= $nbOtherParents) {
80 $result->indexInLastCommonParent
= $myParents[$i - 1]->getIndexOf($this);
85 public function setParent($parent) {
86 $this->parent
= $parent;
87 unset($this->parentTree
);
90 public function inPre() {
91 $tree = $this->getParentTree();
92 foreach ($tree as &$ancestor) {
93 if ($ancestor->isPre()) {
102 * Node that can contain other nodes. Represents an HTML tag.
103 * @ingroup DifferenceEngine
105 class TagNode
extends Node
{
107 public $children = array();
111 public $attributes = array();
115 function __construct($parent, $qName, /*array*/ $attributes) {
116 parent
::__construct($parent);
117 $this->qName
= strtolower($qName);
118 foreach($attributes as $key => &$value){
119 $this->attributes
[strtolower($key)] = $value;
121 return $this->openingTag
= Xml
::openElement($this->qName
, $this->attributes
);
124 public function addChildAbsolute(Node
$node, $index) {
125 array_splice($this->children
, $index, 0, array($node));
128 public function getIndexOf(Node
$child) {
129 // don't trust array_search with objects
130 foreach ($this->children
as $key => &$value){
131 if ($value === $child) {
138 public function getNbChildren() {
139 return count($this->children
);
142 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
146 $somethingDeleted = false;
147 $hasNonDeletedDescendant = false;
149 if (empty($this->children
)) {
153 foreach ($this->children
as &$child) {
154 $allDeleted_local = false;
155 $somethingDeleted_local = false;
156 $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local);
157 if ($somethingDeleted_local) {
158 $nodes = array_merge($nodes, $childrenChildren);
159 $somethingDeleted = true;
161 if (!$allDeleted_local) {
162 $hasNonDeletedDescendant = true;
165 if (!$hasNonDeletedDescendant) {
166 $nodes = array($this);
172 public function splitUntil(TagNode
$parent, Node
$split, $includeLeft) {
173 $splitOccured = false;
174 if ($parent !== $this) {
175 $part1 = new TagNode(null, $this->qName
, $this->attributes
);
176 $part2 = new TagNode(null, $this->qName
, $this->attributes
);
177 $part1->setParent($this->parent
);
178 $part2->setParent($this->parent
);
182 foreach ($this->children
as &$child)
184 if ($child === $split) {
187 if(!$pastSplit ||
($onSplit && $includeLeft)) {
188 $child->setParent($part1);
189 $part1->children
[] = $child;
191 $child->setParent($part2);
192 $part2->children
[] = $child;
199 $myindexinparent = $this->parent
->getIndexOf($this);
200 if (!empty($part1->children
)) {
201 $this->parent
->addChildAbsolute($part1, $myindexinparent);
203 if (!empty($part2->children
)) {
204 $this->parent
->addChildAbsolute($part2, $myindexinparent);
206 if (!empty($part1->children
) && !empty($part2->children
)) {
207 $splitOccured = true;
210 $this->parent
->removeChild($myindexinparent);
213 $this->parent
->splitUntil($parent, $part1, $includeLeft);
215 $this->parent
->splitUntil($parent, $part2, $includeLeft);
218 return $splitOccured;
222 private function removeChild($index) {
223 unset($this->children
[$index]);
224 $this->children
= array_values($this->children
);
227 public static $blocks = array('html', 'body','p','blockquote', 'h1',
228 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table',
229 'tbody', 'tr', 'td', 'th', 'br');
231 public function copyTree() {
232 $newThis = new TagNode(null, $this->qName
, $this->attributes
);
233 $newThis->whiteBefore
= $this->whiteBefore
;
234 $newThis->whiteAfter
= $this->whiteAfter
;
235 foreach ($this->children
as &$child) {
236 $newChild = $child->copyTree();
237 $newChild->setParent($newThis);
238 $newThis->children
[] = $newChild;
243 public function getMatchRatio(TagNode
$other) {
244 $txtComp = new TextOnlyComparator($other);
245 return $txtComp->getMatchRatio(new TextOnlyComparator($this));
248 public function expandWhiteSpace() {
252 $nbOriginalChildren = $this->getNbChildren();
253 for ($i = 0; $i < $nbOriginalChildren; ++
$i) {
254 $child = $this->children
[$i +
$shift];
256 if ($child instanceof TagNode
) {
257 if (!$child->isPre()) {
258 $child->expandWhiteSpace();
261 if (!$spaceAdded && $child->whiteBefore
) {
262 $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild());
263 $ws->setParent($this);
264 $this->addChildAbsolute($ws,$i +
($shift++
));
266 if ($child->whiteAfter
) {
267 $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild());
268 $ws->setParent($this);
269 $this->addChildAbsolute($ws,$i +
1 +
($shift++
));
278 public function getLeftMostChild() {
279 if (empty($this->children
)) {
282 return $this->children
[0]->getLeftMostChild();
285 public function getRightMostChild() {
286 if (empty($this->children
)) {
289 return $this->children
[$this->getNbChildren() - 1]->getRightMostChild();
292 public function isPre() {
293 return 0 == strcasecmp($this->qName
,'pre');
296 public static function toDiffLine(TagNode
$node) {
297 return $node->openingTag
;
302 * Represents a piece of text in the HTML file.
303 * @ingroup DifferenceEngine
305 class TextNode
extends Node
{
309 public $modification;
311 function __construct($parent, $text) {
312 parent
::__construct($parent);
313 $this->modification
= new Modification(Modification
::NONE
);
317 public function copyTree() {
318 $clone = clone $this;
319 $clone->setParent(null);
323 public function getLeftMostChild() {
327 public function getRightMostChild() {
331 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
332 if ($this->modification
->type
== Modification
::REMOVED
333 && $this->modification
->id
== $id){
334 $somethingDeleted = true;
341 public function isSameText($other) {
342 if (is_null($other) ||
! $other instanceof TextNode
) {
345 return str_replace('\n', ' ',$this->text
) === str_replace('\n', ' ',$other->text
);
348 public static function toDiffLine(TextNode
$node) {
349 return str_replace('\n', ' ',$node->text
);
355 * @ingroup DifferenceEngine
357 class WhiteSpaceNode
extends TextNode
{
359 function __construct($parent, $s, Node
$like = null) {
360 parent
::__construct($parent, $s);
361 if(!is_null($like) && $like instanceof TextNode
) {
362 $newModification = clone $like->modification
;
363 $newModification->firstOfID
= false;
364 $this->modification
= $newModification;
370 * Represents the root of a HTML document.
371 * @ingroup DifferenceEngine
373 class BodyNode
extends TagNode
{
375 function __construct() {
376 parent
::__construct(null, 'body', array());
379 public function copyTree() {
380 $newThis = new BodyNode();
381 foreach ($this->children
as &$child) {
382 $newChild = $child->copyTree();
383 $newChild->setParent($newThis);
384 $newThis->children
[] = $newChild;
389 public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
391 foreach ($this->children
as &$child) {
392 $childrenChildren = $child->getMinimalDeletedSet($id,
393 $allDeleted, $somethingDeleted);
394 $nodes = array_merge($nodes, $childrenChildren);
402 * Represents an image in HTML. Even though images do not contain any text they
403 * are independent visible objects on the page. They are logically a TextNode.
404 * @ingroup DifferenceEngine
406 class ImageNode
extends TextNode
{
410 function __construct(TagNode
$parent, /*array*/ $attrs) {
411 if(!array_key_exists('src', $attrs)) {
412 HTMLDiffer
::diffDebug( "Image without a source\n" );
413 parent
::__construct($parent, '<img></img>');
415 parent
::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
417 $this->attributes
= $attrs;
420 public function isSameText($other) {
421 if (is_null($other) ||
! $other instanceof ImageNode
) {
424 return $this->text
=== $other->text
;
431 * @ingroup DifferenceEngine
433 class DummyNode
extends Node
{
435 function __construct() {