Fix for bug 20601: disable debug output. It can be re-enabled by patching.
[lhc/web/wiklou.git] / includes / diff / HTMLDiff.php
1 <?php
2
3 /** Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * or see http://www.gnu.org/
19 *
20 * @ingroup DifferenceEngine
21 */
22
23 /**
24 * When detecting the last common parent of two nodes, all results are stored as
25 * a LastCommonParentResult.
26 */
27 class LastCommonParentResult {
28
29 // Parent
30 public $parent;
31
32 // Splitting
33 public $splittingNeeded = false;
34
35 // Depth
36 public $lastCommonParentDepth = -1;
37
38 // Index
39 public $indexInLastCommonParent = -1;
40 }
41
42 class Modification{
43
44 const NONE = 1;
45 const REMOVED = 2;
46 const ADDED = 4;
47 const CHANGED = 8;
48
49 public $type;
50
51 public $id = -1;
52
53 public $firstOfID = false;
54
55 public $changes;
56
57 function __construct($type) {
58 $this->type = $type;
59 }
60
61 public static function typeToString($type) {
62 switch($type) {
63 case self::NONE: return 'none';
64 case self::REMOVED: return 'removed';
65 case self::ADDED: return 'added';
66 case self::CHANGED: return 'changed';
67 }
68 }
69 }
70
71 class DomTreeBuilder {
72
73 public $textNodes = array();
74
75 public $bodyNode;
76
77 private $currentParent;
78
79 private $newWord = '';
80
81 protected $bodyStarted = false;
82
83 protected $bodyEnded = false;
84
85 private $whiteSpaceBeforeThis = false;
86
87 private $lastSibling;
88
89 private $notInPre = true;
90
91 function __construct() {
92 $this->bodyNode = $this->currentParent = new BodyNode();
93 $this->lastSibling = new DummyNode();
94 }
95
96 /**
97 * Must be called manually
98 */
99 public function endDocument() {
100 $this->endWord();
101 HTMLDiffer::diffDebug( count($this->textNodes) . " text nodes in document.\n" );
102 }
103
104 public function startElement($parser, $name, /*array*/ $attributes) {
105 if (strcasecmp($name, 'body') != 0) {
106 HTMLDiffer::diffDebug( "Starting $name node.\n" );
107 $this->endWord();
108
109 $newNode = new TagNode($this->currentParent, $name, $attributes);
110 $this->currentParent->children[] = $newNode;
111 $this->currentParent = $newNode;
112 $this->lastSibling = new DummyNode();
113 if ($this->whiteSpaceBeforeThis && !in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
114 $this->currentParent->whiteBefore = true;
115 }
116 $this->whiteSpaceBeforeThis = false;
117 if(strcasecmp($name, 'pre') == 0) {
118 $this->notInPre = false;
119 }
120 }
121 }
122
123 public function endElement($parser, $name) {
124 if(strcasecmp($name, 'body') != 0) {
125 HTMLDiffer::diffDebug( "Ending $name node.\n");
126 if (0 == strcasecmp($name,'img')) {
127 // Insert a dummy leaf for the image
128 $img = new ImageNode($this->currentParent, $this->currentParent->attributes);
129 $this->currentParent->children[] = $img;
130 $img->whiteBefore = $this->whiteSpaceBeforeThis;
131 $this->lastSibling = $img;
132 $this->textNodes[] = $img;
133 }
134 $this->endWord();
135 if (!in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
136 $this->lastSibling = $this->currentParent;
137 } else {
138 $this->lastSibling = new DummyNode();
139 }
140 $this->currentParent = $this->currentParent->parent;
141 $this->whiteSpaceBeforeThis = false;
142 if (!$this->notInPre && strcasecmp($name, 'pre') == 0) {
143 $this->notInPre = true;
144 }
145 } else {
146 $this->endDocument();
147 }
148 }
149
150 const regex = '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1})/';
151 const whitespace = '/^[\s]{1}$/';
152 const delimiter = '/^[\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1}$/';
153
154 public function characters($parser, $data) {
155 $matches = preg_split(self::regex, $data, -1, PREG_SPLIT_DELIM_CAPTURE);
156
157 foreach($matches as &$word) {
158 if (preg_match(self::whitespace, $word) && $this->notInPre) {
159 $this->endWord();
160 $this->lastSibling->whiteAfter = true;
161 $this->whiteSpaceBeforeThis = true;
162 } else if (preg_match(self::delimiter, $word)) {
163 $this->endWord();
164 $textNode = new TextNode($this->currentParent, $word);
165 $this->currentParent->children[] = $textNode;
166 $textNode->whiteBefore = $this->whiteSpaceBeforeThis;
167 $this->whiteSpaceBeforeThis = false;
168 $this->lastSibling = $textNode;
169 $this->textNodes[] = $textNode;
170 } else {
171 $this->newWord .= $word;
172 }
173 }
174 }
175
176 private function endWord() {
177 if ($this->newWord !== '') {
178 $node = new TextNode($this->currentParent, $this->newWord);
179 $this->currentParent->children[] = $node;
180 $node->whiteBefore = $this->whiteSpaceBeforeThis;
181 $this->whiteSpaceBeforeThis = false;
182 $this->lastSibling = $node;
183 $this->textNodes[] = $node;
184 $this->newWord = "";
185 }
186 }
187
188 public function getDiffLines() {
189 return array_map(array('TextNode','toDiffLine'), $this->textNodes);
190 }
191 }
192
193 class TextNodeDiffer {
194
195 private $textNodes;
196 public $bodyNode;
197
198 private $oldTextNodes;
199 private $oldBodyNode;
200
201 private $newID = 0;
202
203 private $changedID = 0;
204
205 private $changedIDUsed = false;
206
207 // used to remove the whitespace between a red and green block
208 private $whiteAfterLastChangedPart = false;
209
210 private $deletedID = 0;
211
212 function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
213 $this->textNodes = $tree->textNodes;
214 $this->bodyNode = $tree->bodyNode;
215 $this->oldTextNodes = $oldTree->textNodes;
216 $this->oldBodyNode = $oldTree->bodyNode;
217 }
218
219 public function markAsNew($start, $end) {
220 if ($end <= $start) {
221 return;
222 }
223
224 if ($this->whiteAfterLastChangedPart) {
225 $this->textNodes[$start]->whiteBefore = false;
226 }
227
228 for ($i = $start; $i < $end; ++$i) {
229 $mod = new Modification(Modification::ADDED);
230 $mod->id = $this->newID;
231 $this->textNodes[$i]->modification = $mod;
232 }
233 if ($start < $end) {
234 $this->textNodes[$start]->modification->firstOfID = true;
235 }
236 ++$this->newID;
237 }
238
239 public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
240 $i = $rightstart;
241 $j = $leftstart;
242
243 if ($this->changedIDUsed) {
244 ++$this->changedID;
245 $this->changedIDUsed = false;
246 }
247
248 $changes;
249 while ($i < $rightend) {
250 $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
251 $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
252 $result = $acthis->getResult($acother);
253 unset($acthis, $acother);
254
255 if ( $result ) {
256 $mod = new Modification(Modification::CHANGED);
257
258 if (!$this->changedIDUsed) {
259 $mod->firstOfID = true;
260 } else if (!is_null( $result ) && $result !== $this->changes) {
261 ++$this->changedID;
262 $mod->firstOfID = true;
263 }
264
265 $mod->changes = $result;
266 $mod->id = $this->changedID;
267
268 $this->textNodes[$i]->modification = $mod;
269 $this->changes = $result;
270 $this->changedIDUsed = true;
271 } else if ($this->changedIDUsed) {
272 ++$this->changedID;
273 $this->changedIDUsed = false;
274 }
275 ++$i;
276 ++$j;
277 }
278 }
279
280 public function markAsDeleted($start, $end, $before) {
281
282 if ($end <= $start) {
283 return;
284 }
285
286 if ($before > 0 && $this->textNodes[$before - 1]->whiteAfter) {
287 $this->whiteAfterLastChangedPart = true;
288 } else {
289 $this->whiteAfterLastChangedPart = false;
290 }
291
292 for ($i = $start; $i < $end; ++$i) {
293 $mod = new Modification(Modification::REMOVED);
294 $mod->id = $this->deletedID;
295
296 // oldTextNodes is used here because we're going to move its deleted
297 // elements to this tree!
298 $this->oldTextNodes[$i]->modification = $mod;
299 }
300 $this->oldTextNodes[$start]->modification->firstOfID = true;
301
302 $root = $this->oldTextNodes[$start]->getLastCommonParent($this->oldTextNodes[$end-1])->parent;
303
304 $junk1 = $junk2 = null;
305 $deletedNodes = $root->getMinimalDeletedSet($this->deletedID, $junk1, $junk2);
306
307 HTMLDiffer::diffDebug( "Minimal set of deleted nodes of size " . count($deletedNodes) . "\n" );
308
309 // Set prevLeaf to the leaf after which the old HTML needs to be
310 // inserted
311 if ($before > 0) {
312 $prevLeaf = $this->textNodes[$before - 1];
313 }
314 // Set nextLeaf to the leaf before which the old HTML needs to be
315 // inserted
316 if ($before < count($this->textNodes)) {
317 $nextLeaf = $this->textNodes[$before];
318 }
319
320 while (count($deletedNodes) > 0) {
321 if (isset($prevLeaf)) {
322 $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
323 } else {
324 $prevResult = new LastCommonParentResult();
325 $prevResult->parent = $this->bodyNode;
326 $prevResult->indexInLastCommonParent = -1;
327 }
328 if (isset($nextleaf)) {
329 $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[count($deletedNodes) - 1]);
330 } else {
331 $nextResult = new LastCommonParentResult();
332 $nextResult->parent = $this->bodyNode;
333 $nextResult->indexInLastCommonParent = $this->bodyNode->getNbChildren();
334 }
335
336 if ($prevResult->lastCommonParentDepth == $nextResult->lastCommonParentDepth) {
337 // We need some metric to choose which way to add-...
338 if ($deletedNodes[0]->parent === $deletedNodes[count($deletedNodes) - 1]->parent
339 && $prevResult->parent === $nextResult->parent) {
340 // The difference is not in the parent
341 $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
342 } else {
343 // The difference is in the parent, so compare them
344 // now THIS is tricky
345 $distancePrev = $deletedNodes[0]->parent->getMatchRatio($prevResult->parent);
346 $distanceNext = $deletedNodes[count($deletedNodes) - 1]->parent->getMatchRatio($nextResult->parent);
347
348 if ($distancePrev <= $distanceNext) {
349 $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
350 } else {
351 $nextResult->lastCommonParentDepth = $nextResult->lastCommonParentDepth + 1;
352 }
353 }
354
355 }
356
357 if ($prevResult->lastCommonParentDepth > $nextResult->lastCommonParentDepth) {
358 // Inserting at the front
359 if ($prevResult->splittingNeeded) {
360 $prevLeaf->parent->splitUntil($prevResult->parent, $prevLeaf, true);
361 }
362 $prevLeaf = $deletedNodes[0]->copyTree();
363 unset($deletedNodes[0]);
364 $deletedNodes = array_values($deletedNodes);
365 $prevLeaf->setParent($prevResult->parent);
366 $prevResult->parent->addChildAbsolute($prevLeaf,$prevResult->indexInLastCommonParent + 1);
367 } else if ($prevResult->lastCommonParentDepth < $nextResult->lastCommonParentDepth) {
368 // Inserting at the back
369 if ($nextResult->splittingNeeded) {
370 $splitOccured = $nextLeaf->parent->splitUntil($nextResult->parent, $nextLeaf, false);
371 if ($splitOccured) {
372 // The place where to insert is shifted one place to the
373 // right
374 $nextResult->indexInLastCommonParent = $nextResult->indexInLastCommonParent + 1;
375 }
376 }
377 $nextLeaf = $deletedNodes[count(deletedNodes) - 1]->copyTree();
378 unset($deletedNodes[count(deletedNodes) - 1]);
379 $deletedNodes = array_values($deletedNodes);
380 $nextLeaf->setParent($nextResult->parent);
381 $nextResult->parent->addChildAbsolute($nextLeaf,$nextResult->indexInLastCommonParent);
382 }
383 }
384 ++$this->deletedID;
385 }
386
387 public function expandWhiteSpace() {
388 $this->bodyNode->expandWhiteSpace();
389 }
390
391 public function lengthNew(){
392 return count($this->textNodes);
393 }
394
395 public function lengthOld(){
396 return count($this->oldTextNodes);
397 }
398 }
399
400 class HTMLDiffer {
401
402 private $output;
403 private static $debug = '';
404
405 function __construct($output) {
406 $this->output = $output;
407 }
408
409 function htmlDiff($from, $to) {
410 wfProfileIn( __METHOD__ );
411 // Create an XML parser
412 $xml_parser = xml_parser_create('');
413
414 $domfrom = new DomTreeBuilder();
415
416 // Set the functions to handle opening and closing tags
417 xml_set_element_handler($xml_parser, array($domfrom, "startElement"), array($domfrom, "endElement"));
418
419 // Set the function to handle blocks of character data
420 xml_set_character_data_handler($xml_parser, array($domfrom, "characters"));
421
422 HTMLDiffer::diffDebug( "Parsing " . strlen($from) . " characters worth of HTML\n" );
423 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
424 || !xml_parse($xml_parser, $from, false)
425 || !xml_parse($xml_parser, '</body>', true)){
426 $error = xml_error_string(xml_get_error_code($xml_parser));
427 $line = xml_get_current_line_number($xml_parser);
428 HTMLDiffer::diffDebug( "XML error: $error at line $line\n" );
429 }
430 xml_parser_free($xml_parser);
431 unset($from);
432
433 $xml_parser = xml_parser_create('');
434
435 $domto = new DomTreeBuilder();
436
437 // Set the functions to handle opening and closing tags
438 xml_set_element_handler($xml_parser, array($domto, "startElement"), array($domto, "endElement"));
439
440 // Set the function to handle blocks of character data
441 xml_set_character_data_handler($xml_parser, array($domto, "characters"));
442
443 HTMLDiffer::diffDebug( "Parsing " . strlen($to) . " characters worth of HTML\n" );
444 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
445 || !xml_parse($xml_parser, $to, false)
446 || !xml_parse($xml_parser, '</body>', true)){
447 $error = xml_error_string(xml_get_error_code($xml_parser));
448 $line = xml_get_current_line_number($xml_parser);
449 HTMLDiffer::diffDebug( "XML error: $error at line $line\n" );
450 }
451 xml_parser_free($xml_parser);
452 unset($to);
453
454 $diffengine = new WikiDiff3();
455 $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
456 unset($xml_parser, $diffengine);
457
458 $domdiffer = new TextNodeDiffer($domto, $domfrom);
459
460 $currentIndexLeft = 0;
461 $currentIndexRight = 0;
462 foreach ($differences as &$d) {
463 if ($d->leftstart > $currentIndexLeft) {
464 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
465 $currentIndexRight, $d->rightstart);
466 }
467 if ($d->leftlength > 0) {
468 $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
469 }
470 $domdiffer->markAsNew($d->rightstart, $d->rightend);
471
472 $currentIndexLeft = $d->leftend;
473 $currentIndexRight = $d->rightend;
474 }
475 $oldLength = $domdiffer->lengthOld();
476 if ($currentIndexLeft < $oldLength) {
477 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $oldLength, $currentIndexRight, $domdiffer->lengthNew());
478 }
479 $domdiffer->expandWhiteSpace();
480 $output = new HTMLOutput('htmldiff', $this->output);
481 $output->parse($domdiffer->bodyNode);
482 wfProfileOut( __METHOD__ );
483 }
484
485 private function preProcess(/*array*/ $differences) {
486 $newRanges = array();
487
488 $nbDifferences = count($differences);
489 for ($i = 0; $i < $nbDifferences; ++$i) {
490 $leftStart = $differences[$i]->leftstart;
491 $leftEnd = $differences[$i]->leftend;
492 $rightStart = $differences[$i]->rightstart;
493 $rightEnd = $differences[$i]->rightend;
494
495 $leftLength = $leftEnd - $leftStart;
496 $rightLength = $rightEnd - $rightStart;
497
498 while ($i + 1 < $nbDifferences && self::score($leftLength,
499 $differences[$i + 1]->leftlength,
500 $rightLength,
501 $differences[$i + 1]->rightlength)
502 > ($differences[$i + 1]->leftstart - $leftEnd)) {
503 $leftEnd = $differences[$i + 1]->leftend;
504 $rightEnd = $differences[$i + 1]->rightend;
505 $leftLength = $leftEnd - $leftStart;
506 $rightLength = $rightEnd - $rightStart;
507 ++$i;
508 }
509 $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
510 }
511 return $newRanges;
512 }
513
514 /**
515 * Heuristic to merge differences for readability.
516 */
517 public static function score($ll, $nll, $rl, $nrl) {
518 if (($ll == 0 && $nll == 0)
519 || ($rl == 0 && $nrl == 0)) {
520 return 0;
521 }
522 $numbers = array($ll, $nll, $rl, $nrl);
523 $d = 0;
524 foreach ($numbers as &$number) {
525 while ($number > 3) {
526 $d += 3;
527 $number -= 3;
528 $number *= 0.5;
529 }
530 $d += $number;
531
532 }
533 return $d / (1.5 * count($numbers));
534 }
535
536 /**
537 * Add to debug output
538 * @param string $str Debug output
539 */
540 public static function diffDebug( $str ) {
541 self :: $debug .= $str;
542 }
543
544 /**
545 * Get debug output
546 * @return string
547 */
548 public static function getDebugOutput() {
549 return self :: $debug;
550 }
551
552 }
553
554 class TextOnlyComparator {
555
556 public $leafs = array();
557
558 function _construct(TagNode $tree) {
559 $this->addRecursive($tree);
560 $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
561 }
562
563 private function addRecursive(TagNode $tree) {
564 foreach ($tree->children as &$child) {
565 if ($child instanceof TagNode) {
566 $this->addRecursive($child);
567 } else if ($child instanceof TextNode) {
568 $this->leafs[] = $node;
569 }
570 }
571 }
572
573 public function getMatchRatio(TextOnlyComparator $other) {
574 $nbOthers = count($other->leafs);
575 $nbThis = count($this->leafs);
576 if($nbOthers == 0 || $nbThis == 0){
577 return -log(0);
578 }
579
580 $diffengine = new WikiDiff3(25000, 1.35);
581 $diffengine->diff($this->leafs, $other->leafs);
582
583 $lcsLength = $diffengine->getLcsLength();
584
585 $distanceThis = $nbThis-$lcsLength;
586
587 return (2.0 - $lcsLength/$nbOthers - $lcsLength/$nbThis) / 2.0;
588 }
589 }
590
591 /**
592 * A comparator used when calculating the difference in ancestry of two Nodes.
593 */
594 class AncestorComparator {
595
596 public $ancestors;
597 public $ancestorsText;
598
599 function __construct(/*array*/ $ancestors) {
600 $this->ancestors = $ancestors;
601 $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
602 }
603
604 public $compareTxt = "";
605
606 public function getResult(AncestorComparator $other) {
607
608 $diffengine = new WikiDiff3(10000, 1.35);
609 $differences = $diffengine->diff_range($other->ancestorsText,$this->ancestorsText);
610
611 if (count($differences) == 0){
612 return null;
613 }
614 $changeTxt = new ChangeTextGenerator($this, $other);
615
616 return $changeTxt->getChanged($differences)->toString();;
617 }
618 }
619
620 class ChangeTextGenerator {
621
622 private $ancestorComparator;
623 private $other;
624
625 private $factory;
626
627 function __construct(AncestorComparator $ancestorComparator, AncestorComparator $other) {
628 $this->ancestorComparator = $ancestorComparator;
629 $this->other = $other;
630 $this->factory = new TagToStringFactory();
631 }
632
633 public function getChanged(/*array*/ $differences) {
634 $txt = new ChangeText;
635 $rootlistopened = false;
636 if (count($differences) > 1) {
637 $txt->addHtml('<ul class="changelist">');
638 $rootlistopened = true;
639 }
640 $nbDifferences = count($differences);
641 for ($j = 0; $j < $nbDifferences; ++$j) {
642 $d = $differences[$j];
643 $lvl1listopened = false;
644 if ($rootlistopened) {
645 $txt->addHtml('<li>');
646 }
647 if ($d->leftlength + $d->rightlength > 1) {
648 $txt->addHtml('<ul class="changelist">');
649 $lvl1listopened = true;
650 }
651 // left are the old ones
652 for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
653 if ($lvl1listopened){
654 $txt->addHtml('<li>');
655 }
656 // add a bullet for a old tag
657 $this->addTagOld($txt, $this->other->ancestors[$i]);
658 if ($lvl1listopened){
659 $txt->addHtml('</li>');
660 }
661 }
662 // right are the new ones
663 for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
664 if ($lvl1listopened){
665 $txt->addHtml('<li>');
666 }
667 // add a bullet for a new tag
668 $this->addTagNew($txt, $this->ancestorComparator->ancestors[$i]);
669
670 if ($lvl1listopened){
671 $txt->addHtml('</li>');
672 }
673 }
674 if ($lvl1listopened) {
675 $txt->addHtml('</ul>');
676 }
677 if ($rootlistopened) {
678 $txt->addHtml('</li>');
679 }
680 }
681 if ($rootlistopened) {
682 $txt->addHtml('</ul>');
683 }
684 return $txt;
685 }
686
687 private function addTagOld(ChangeText $txt, TagNode $ancestor) {
688 $this->factory->create($ancestor)->getRemovedDescription($txt);
689 }
690
691 private function addTagNew(ChangeText $txt, TagNode $ancestor) {
692 $this->factory->create($ancestor)->getAddedDescription($txt);
693 }
694 }
695
696 class ChangeText {
697
698 private $txt = "";
699
700 public function addHtml($s) {
701 $this->txt .= $s;
702 }
703
704 public function toString() {
705 return $this->txt;
706 }
707 }
708
709 class TagToStringFactory {
710
711 private static $containerTags = array('html', 'body', 'p', 'blockquote',
712 'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li',
713 'table', 'tbody', 'tr', 'td', 'th', 'br', 'hr', 'code', 'dl',
714 'dt', 'dd', 'input', 'form', 'img', 'span', 'a');
715
716 private static $styleTags = array('i', 'b', 'strong', 'em', 'font',
717 'big', 'del', 'tt', 'sub', 'sup', 'strike');
718
719 const MOVED = 1;
720 const STYLE = 2;
721 const UNKNOWN = 4;
722
723 public function create(TagNode $node) {
724 $sem = $this->getChangeSemantic($node->qName);
725 if (strcasecmp($node->qName,'a') == 0) {
726 return new AnchorToString($node, $sem);
727 }
728 if (strcasecmp($node->qName,'img') == 0) {
729 return new NoContentTagToString($node, $sem);
730 }
731 return new TagToString($node, $sem);
732 }
733
734 protected function getChangeSemantic($qname) {
735 if (in_array(strtolower($qname),self::$containerTags)) {
736 return self::MOVED;
737 }
738 if (in_array(strtolower($qname),self::$styleTags)) {
739 return self::STYLE;
740 }
741 return self::UNKNOWN;
742 }
743 }
744
745 class TagToString {
746
747 protected $node;
748
749 protected $sem;
750
751 function __construct(TagNode $node, $sem) {
752 $this->node = $node;
753 $this->sem = $sem;
754 }
755
756 public function getRemovedDescription(ChangeText $txt) {
757 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
758 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
759 $tagDescription = "&lt;" . $this->node->qName . "&gt;";
760 }
761 if ($this->sem == TagToStringFactory::MOVED) {
762 $txt->addHtml( wfMsgExt( 'diff-movedoutof', 'parseinline', $tagDescription ) );
763 } else if ($this->sem == TagToStringFactory::STYLE) {
764 $txt->addHtml( wfMsgExt( 'diff-styleremoved' , 'parseinline', $tagDescription ) );
765 } else {
766 $txt->addHtml( wfMsgExt( 'diff-removed' , 'parseinline', $tagDescription ) );
767 }
768 $this->addAttributes($txt, $this->node->attributes);
769 $txt->addHtml('.');
770 }
771
772 public function getAddedDescription(ChangeText $txt) {
773 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
774 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
775 $tagDescription = "&lt;" . $this->node->qName . "&gt;";
776 }
777 if ($this->sem == TagToStringFactory::MOVED) {
778 $txt->addHtml( wfMsgExt( 'diff-movedto' , 'parseinline', $tagDescription) );
779 } else if ($this->sem == TagToStringFactory::STYLE) {
780 $txt->addHtml( wfMsgExt( 'diff-styleadded', 'parseinline', $tagDescription ) );
781 } else {
782 $txt->addHtml( wfMsgExt( 'diff-added', 'parseinline', $tagDescription ) );
783 }
784 $this->addAttributes($txt, $this->node->attributes);
785 $txt->addHtml('.');
786 }
787
788 protected function addAttributes(ChangeText $txt, array $attributes) {
789 if (count($attributes) < 1) {
790 return;
791 }
792 $firstOne = true;
793 $nbAttributes_min_1 = count($attributes)-1;
794 $keys = array_keys($attributes);
795 for ($i=0;$i<$nbAttributes_min_1;$i++) {
796 $key = $keys[$i];
797 $attr = $attributes[$key];
798 if($firstOne) {
799 $firstOne = false;
800 $txt->addHtml( wfMsgExt('diff-with', 'escapenoentities', $this->translateArgument($key), htmlspecialchars($attr) ) );
801 continue;
802 }
803 $txt->addHtml( wfMsgExt( 'comma-separator', 'escapenoentities' ) .
804 wfMsgExt( 'diff-with-additional', 'escapenoentities',
805 $this->translateArgument( $key ), htmlspecialchars( $attr ) )
806 );
807 }
808
809 if ($nbAttributes_min_1 > 0) {
810 $txt->addHtml( wfMsgExt( 'diff-with-final', 'escapenoentities',
811 $this->translateArgument($keys[$nbAttributes_min_1]),
812 htmlspecialchars($attributes[$keys[$nbAttributes_min_1]]) ) );
813 }
814 }
815
816 protected function translateArgument($name) {
817 $translation = wfMsgExt('diff-' . $name, 'parseinline' );
818 if ( wfEmptyMsg( 'diff-' . $name, $translation ) ) {
819 $translation = "&lt;" . $name . "&gt;";;
820 }
821 return htmlspecialchars( $translation );
822 }
823 }
824
825 class NoContentTagToString extends TagToString {
826
827 function __construct(TagNode $node, $sem) {
828 parent::__construct($node, $sem);
829 }
830
831 public function getAddedDescription(ChangeText $txt) {
832 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
833 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
834 $tagDescription = "&lt;" . $this->node->qName . "&gt;";
835 }
836 $txt->addHtml( wfMsgExt('diff-changedto', 'parseinline', $tagDescription ) );
837 $this->addAttributes($txt, $this->node->attributes);
838 $txt->addHtml('.');
839 }
840
841 public function getRemovedDescription(ChangeText $txt) {
842 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
843 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
844 $tagDescription = "&lt;" . $this->node->qName . "&gt;";
845 }
846 $txt->addHtml( wfMsgExt('diff-changedfrom', 'parseinline', $tagDescription ) );
847 $this->addAttributes($txt, $this->node->attributes);
848 $txt->addHtml('.');
849 }
850 }
851
852 class AnchorToString extends TagToString {
853
854 function __construct(TagNode $node, $sem) {
855 parent::__construct($node, $sem);
856 }
857
858 protected function addAttributes(ChangeText $txt, array $attributes) {
859 if (array_key_exists('href', $attributes)) {
860 $txt->addHtml(' ' . wfMsgExt( 'diff-withdestination', 'parseinline', htmlspecialchars($attributes['href']) ) );
861 unset($attributes['href']);
862 }
863 parent::addAttributes($txt, $attributes);
864 }
865 }
866
867 /**
868 * Takes a branch root and creates an HTML file for it.
869 */
870 class HTMLOutput{
871
872 private $prefix;
873 private $handler;
874
875 function __construct($prefix, $handler) {
876 $this->prefix = $prefix;
877 $this->handler = $handler;
878 }
879
880 public function parse(TagNode $node) {
881 $handler = &$this->handler;
882
883 if (strcasecmp($node->qName, 'img') != 0 && strcasecmp($node->qName, 'body') != 0) {
884 $handler->startElement($node->qName, $node->attributes);
885 }
886
887 $newStarted = false;
888 $remStarted = false;
889 $changeStarted = false;
890 $changeTXT = '';
891
892 foreach ($node->children as &$child) {
893 if ($child instanceof TagNode) {
894 if ($newStarted) {
895 $handler->endElement('span');
896 $newStarted = false;
897 } else if ($changeStarted) {
898 $handler->endElement('span');
899 $changeStarted = false;
900 } else if ($remStarted) {
901 $handler->endElement('span');
902 $remStarted = false;
903 }
904 $this->parse($child);
905 } else if ($child instanceof TextNode) {
906 $mod = $child->modification;
907
908 if ($newStarted && ($mod->type != Modification::ADDED || $mod->firstOfID)) {
909 $handler->endElement('span');
910 $newStarted = false;
911 } else if ($changeStarted && ($mod->type != Modification::CHANGED
912 || $mod->changes != $changeTXT || $mod->firstOfID)) {
913 $handler->endElement('span');
914 $changeStarted = false;
915 } else if ($remStarted && ($mod->type != Modification::REMOVED || $mod ->firstOfID)) {
916 $handler->endElement('span');
917 $remStarted = false;
918 }
919
920 // no else because a removed part can just be closed and a new
921 // part can start
922 if (!$newStarted && $mod->type == Modification::ADDED) {
923 $attrs = array('class' => 'diff-html-added');
924 if ($mod->firstOfID) {
925 $attrs['id'] = "added-{$this->prefix}-{$mod->id}";
926 }
927 $handler->startElement('span', $attrs);
928 $newStarted = true;
929 } else if (!$changeStarted && $mod->type == Modification::CHANGED) {
930 $attrs = array('class' => 'diff-html-changed');
931 if ($mod->firstOfID) {
932 $attrs['id'] = "changed-{$this->prefix}-{$mod->id}";
933 }
934 $handler->startElement('span', $attrs);
935
936 //tooltip
937 $handler->startElement('span', array('class' => 'tip'));
938 $handler->html($mod->changes);
939 $handler->endElement('span');
940
941 $changeStarted = true;
942 $changeTXT = $mod->changes;
943 } else if (!$remStarted && $mod->type == Modification::REMOVED) {
944 $attrs = array('class'=>'diff-html-removed');
945 if ($mod->firstOfID) {
946 $attrs['id'] = "removed-{$this->prefix}-{$mod->id}";
947 }
948 $handler->startElement('span', $attrs);
949 $remStarted = true;
950 }
951
952 $chars = $child->text;
953
954 if ($child instanceof ImageNode) {
955 $this->writeImage($child);
956 } else {
957 $handler->characters($chars);
958 }
959 }
960 }
961
962 if ($newStarted) {
963 $handler->endElement('span');
964 $newStarted = false;
965 } else if ($changeStarted) {
966 $handler->endElement('span');
967 $changeStarted = false;
968 } else if ($remStarted) {
969 $handler->endElement('span');
970 $remStarted = false;
971 }
972
973 if (strcasecmp($node->qName, 'img') != 0
974 && strcasecmp($node->qName, 'body') != 0) {
975 $handler->endElement($node->qName);
976 }
977 }
978
979 private function writeImage(ImageNode $imgNode) {
980 $attrs = $imgNode->attributes;
981 $this->handler->startElement('img', $attrs);
982 $this->handler->endElement('img');
983 }
984 }
985
986 class DelegatingContentHandler {
987
988 private $delegate;
989
990 function __construct($delegate) {
991 $this->delegate = $delegate;
992 }
993
994 function startElement($qname, /*array*/ $arguments) {
995 $this->delegate->addHtml(Xml::openElement($qname, $arguments));
996 }
997
998 function endElement($qname){
999 $this->delegate->addHtml(Xml::closeElement($qname));
1000 }
1001
1002 function characters($chars){
1003 $this->delegate->addHtml(htmlspecialchars($chars));
1004 }
1005
1006 function html($html){
1007 $this->delegate->addHtml($html);
1008 }
1009 }