Clean up some of the i18n stuff in HTMLDiff. Still not perfect, but better.
[lhc/web/wiklou.git] / includes / diff / HTMLDiff.php
1 <?php
2
3 /** Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * or see http://www.gnu.org/
19 *
20 * @ingroup DifferenceEngine
21 */
22
23 /**
24 * When detecting the last common parent of two nodes, all results are stored as
25 * a LastCommonParentResult.
26 */
27 class LastCommonParentResult {
28
29 // Parent
30 public $parent;
31
32 // Splitting
33 public $splittingNeeded = false;
34
35 // Depth
36 public $lastCommonParentDepth = -1;
37
38 // Index
39 public $indexInLastCommonParent = -1;
40 }
41
42 class Modification{
43
44 const NONE = 1;
45 const REMOVED = 2;
46 const ADDED = 4;
47 const CHANGED = 8;
48
49 public $type;
50
51 public $id = -1;
52
53 public $firstOfID = false;
54
55 public $changes;
56
57 function __construct($type) {
58 $this->type = $type;
59 }
60
61 public static function typeToString($type) {
62 switch($type) {
63 case self::NONE: return 'none';
64 case self::REMOVED: return 'removed';
65 case self::ADDED: return 'added';
66 case self::CHANGED: return 'changed';
67 }
68 }
69 }
70
71 class DomTreeBuilder {
72
73 public $textNodes = array();
74
75 public $bodyNode;
76
77 private $currentParent;
78
79 private $newWord = '';
80
81 protected $bodyStarted = false;
82
83 protected $bodyEnded = false;
84
85 private $whiteSpaceBeforeThis = false;
86
87 private $lastSibling;
88
89 private $notInPre = true;
90
91 function __construct() {
92 $this->bodyNode = $this->currentParent = new BodyNode();
93 $this->lastSibling = new DummyNode();
94 }
95
96 /**
97 * Must be called manually
98 */
99 public function endDocument() {
100 $this->endWord();
101 HTMLDiffer::diffDebug( count($this->textNodes) . " text nodes in document.\n" );
102 }
103
104 public function startElement($parser, $name, /*array*/ $attributes) {
105 if (strcasecmp($name, 'body') != 0) {
106 HTMLDiffer::diffDebug( "Starting $name node.\n" );
107 $this->endWord();
108
109 $newNode = new TagNode($this->currentParent, $name, $attributes);
110 $this->currentParent->children[] = $newNode;
111 $this->currentParent = $newNode;
112 $this->lastSibling = new DummyNode();
113 if ($this->whiteSpaceBeforeThis && !in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
114 $this->currentParent->whiteBefore = true;
115 }
116 $this->whiteSpaceBeforeThis = false;
117 if(strcasecmp($name, 'pre') == 0) {
118 $this->notInPre = false;
119 }
120 }
121 }
122
123 public function endElement($parser, $name) {
124 if(strcasecmp($name, 'body') != 0) {
125 HTMLDiffer::diffDebug( "Ending $name node.\n");
126 if (0 == strcasecmp($name,'img')) {
127 // Insert a dummy leaf for the image
128 $img = new ImageNode($this->currentParent, $this->currentParent->attributes);
129 $this->currentParent->children[] = $img;
130 $img->whiteBefore = $this->whiteSpaceBeforeThis;
131 $this->lastSibling = $img;
132 $this->textNodes[] = $img;
133 }
134 $this->endWord();
135 if (!in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
136 $this->lastSibling = $this->currentParent;
137 } else {
138 $this->lastSibling = new DummyNode();
139 }
140 $this->currentParent = $this->currentParent->parent;
141 $this->whiteSpaceBeforeThis = false;
142 if (!$this->notInPre && strcasecmp($name, 'pre') == 0) {
143 $this->notInPre = true;
144 }
145 } else {
146 $this->endDocument();
147 }
148 }
149
150 const regex = '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1})/';
151 const whitespace = '/^[\s]{1}$/';
152 const delimiter = '/^[\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1}$/';
153
154 public function characters($parser, $data) {
155 $matches = preg_split(self::regex, $data, -1, PREG_SPLIT_DELIM_CAPTURE);
156
157 foreach($matches as &$word) {
158 if (preg_match(self::whitespace, $word) && $this->notInPre) {
159 $this->endWord();
160 $this->lastSibling->whiteAfter = true;
161 $this->whiteSpaceBeforeThis = true;
162 } else if (preg_match(self::delimiter, $word)) {
163 $this->endWord();
164 $textNode = new TextNode($this->currentParent, $word);
165 $this->currentParent->children[] = $textNode;
166 $textNode->whiteBefore = $this->whiteSpaceBeforeThis;
167 $this->whiteSpaceBeforeThis = false;
168 $this->lastSibling = $textNode;
169 $this->textNodes[] = $textNode;
170 } else {
171 $this->newWord .= $word;
172 }
173 }
174 }
175
176 private function endWord() {
177 if ($this->newWord !== '') {
178 $node = new TextNode($this->currentParent, $this->newWord);
179 $this->currentParent->children[] = $node;
180 $node->whiteBefore = $this->whiteSpaceBeforeThis;
181 $this->whiteSpaceBeforeThis = false;
182 $this->lastSibling = $node;
183 $this->textNodes[] = $node;
184 $this->newWord = "";
185 }
186 }
187
188 public function getDiffLines() {
189 return array_map(array('TextNode','toDiffLine'), $this->textNodes);
190 }
191 }
192
193 class TextNodeDiffer {
194
195 private $textNodes;
196 public $bodyNode;
197
198 private $oldTextNodes;
199 private $oldBodyNode;
200
201 private $newID = 0;
202
203 private $changedID = 0;
204
205 private $changedIDUsed = false;
206
207 // used to remove the whitespace between a red and green block
208 private $whiteAfterLastChangedPart = false;
209
210 private $deletedID = 0;
211
212 function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
213 $this->textNodes = $tree->textNodes;
214 $this->bodyNode = $tree->bodyNode;
215 $this->oldTextNodes = $oldTree->textNodes;
216 $this->oldBodyNode = $oldTree->bodyNode;
217 }
218
219 public function markAsNew($start, $end) {
220 if ($end <= $start) {
221 return;
222 }
223
224 if ($this->whiteAfterLastChangedPart) {
225 $this->textNodes[$start]->whiteBefore = false;
226 }
227
228 for ($i = $start; $i < $end; ++$i) {
229 $mod = new Modification(Modification::ADDED);
230 $mod->id = $this->newID;
231 $this->textNodes[$i]->modification = $mod;
232 }
233 if ($start < $end) {
234 $this->textNodes[$start]->modification->firstOfID = true;
235 }
236 ++$this->newID;
237 }
238
239 public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
240 $i = $rightstart;
241 $j = $leftstart;
242
243 if ($this->changedIDUsed) {
244 ++$this->changedID;
245 $this->changedIDUsed = false;
246 }
247
248 $changes;
249 while ($i < $rightend) {
250 $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
251 $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
252 $result = $acthis->getResult($acother);
253 unset($acthis, $acother);
254
255 if ($result->changed) {
256 $mod = new Modification(Modification::CHANGED);
257
258 if (!$this->changedIDUsed) {
259 $mod->firstOfID = true;
260 } else if (!is_null($result->changes) && $result->changes !== $this->changes) {
261 ++$this->changedID;
262 $mod->firstOfID = true;
263 }
264
265 $mod->changes = $result->changes;
266 $mod->id = $this->changedID;
267
268 $this->textNodes[$i]->modification = $mod;
269 $this->changes = $result->changes;
270 $this->changedIDUsed = true;
271 } else if ($this->changedIDUsed) {
272 ++$this->changedID;
273 $this->changedIDUsed = false;
274 }
275 ++$i;
276 ++$j;
277 }
278 }
279
280 public function markAsDeleted($start, $end, $before) {
281
282 if ($end <= $start) {
283 return;
284 }
285
286 if ($before > 0 && $this->textNodes[$before - 1]->whiteAfter) {
287 $this->whiteAfterLastChangedPart = true;
288 } else {
289 $this->whiteAfterLastChangedPart = false;
290 }
291
292 for ($i = $start; $i < $end; ++$i) {
293 $mod = new Modification(Modification::REMOVED);
294 $mod->id = $this->deletedID;
295
296 // oldTextNodes is used here because we're going to move its deleted
297 // elements to this tree!
298 $this->oldTextNodes[$i]->modification = $mod;
299 }
300 $this->oldTextNodes[$start]->modification->firstOfID = true;
301
302 $root = $this->oldTextNodes[$start]->getLastCommonParent($this->oldTextNodes[$end-1])->parent;
303
304 $junk1 = $junk2 = null;
305 $deletedNodes = $root->getMinimalDeletedSet($this->deletedID, $junk1, $junk2);
306
307 HTMLDiffer::diffDebug( "Minimal set of deleted nodes of size " . count($deletedNodes) . "\n" );
308
309 // Set prevLeaf to the leaf after which the old HTML needs to be
310 // inserted
311 if ($before > 0) {
312 $prevLeaf = $this->textNodes[$before - 1];
313 }
314 // Set nextLeaf to the leaf before which the old HTML needs to be
315 // inserted
316 if ($before < count($this->textNodes)) {
317 $nextLeaf = $this->textNodes[$before];
318 }
319
320 while (count($deletedNodes) > 0) {
321 if (isset($prevLeaf)) {
322 $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
323 } else {
324 $prevResult = new LastCommonParentResult();
325 $prevResult->parent = $this->bodyNode;
326 $prevResult->indexInLastCommonParent = 0;
327 }
328 if (isset($nextleaf)) {
329 $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[count($deletedNodes) - 1]);
330 } else {
331 $nextResult = new LastCommonParentResult();
332 $nextResult->parent = $this->bodyNode;
333 $nextResult->indexInLastCommonParent = $this->bodyNode->getNbChildren();
334 }
335
336 if ($prevResult->lastCommonParentDepth == $nextResult->lastCommonParentDepth) {
337 // We need some metric to choose which way to add-...
338 if ($deletedNodes[0]->parent === $deletedNodes[count($deletedNodes) - 1]->parent
339 && $prevResult->parent === $nextResult->parent) {
340 // The difference is not in the parent
341 $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
342 } else {
343 // The difference is in the parent, so compare them
344 // now THIS is tricky
345 $distancePrev = $deletedNodes[0]->parent->getMatchRatio($prevResult->parent);
346 $distanceNext = $deletedNodes[count($deletedNodes) - 1]->parent->getMatchRatio($nextResult->parent);
347
348 if ($distancePrev <= $distanceNext) {
349 $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
350 } else {
351 $nextResult->lastCommonParentDepth = $nextResult->lastCommonParentDepth + 1;
352 }
353 }
354
355 }
356
357 if ($prevResult->lastCommonParentDepth > $nextResult->lastCommonParentDepth) {
358 // Inserting at the front
359 if ($prevResult->splittingNeeded) {
360 $prevLeaf->parent->splitUntil($prevResult->parent, $prevLeaf, true);
361 }
362 $prevLeaf = $deletedNodes[0]->copyTree();
363 unset($deletedNodes[0]);
364 $deletedNodes = array_values($deletedNodes);
365 $prevLeaf->setParent($prevResult->parent);
366 $prevResult->parent->addChildAbsolute($prevLeaf,$prevResult->indexInLastCommonParent + 1);
367 } else if ($prevResult->lastCommonParentDepth < $nextResult->lastCommonParentDepth) {
368 // Inserting at the back
369 if ($nextResult->splittingNeeded) {
370 $splitOccured = $nextLeaf->parent->splitUntil($nextResult->parent, $nextLeaf, false);
371 if ($splitOccured) {
372 // The place where to insert is shifted one place to the
373 // right
374 $nextResult->indexInLastCommonParent = $nextResult->indexInLastCommonParent + 1;
375 }
376 }
377 $nextLeaf = $deletedNodes[count(deletedNodes) - 1]->copyTree();
378 unset($deletedNodes[count(deletedNodes) - 1]);
379 $deletedNodes = array_values($deletedNodes);
380 $nextLeaf->setParent($nextResult->parent);
381 $nextResult->parent->addChildAbsolute($nextLeaf,$nextResult->indexInLastCommonParent);
382 }
383 }
384 ++$this->deletedID;
385 }
386
387 public function expandWhiteSpace() {
388 $this->bodyNode->expandWhiteSpace();
389 }
390
391 public function lengthNew(){
392 return count($this->textNodes);
393 }
394
395 public function lengthOld(){
396 return count($this->oldTextNodes);
397 }
398 }
399
400 class HTMLDiffer {
401
402 private $output;
403 private static $debug = '';
404
405 function __construct($output) {
406 $this->output = $output;
407 }
408
409 function htmlDiff($from, $to) {
410 wfProfileIn( __METHOD__ );
411 // Create an XML parser
412 $xml_parser = xml_parser_create('');
413
414 $domfrom = new DomTreeBuilder();
415
416 // Set the functions to handle opening and closing tags
417 xml_set_element_handler($xml_parser, array($domfrom, "startElement"), array($domfrom, "endElement"));
418
419 // Set the function to handle blocks of character data
420 xml_set_character_data_handler($xml_parser, array($domfrom, "characters"));
421
422 HTMLDiffer::diffDebug( "Parsing " . strlen($from) . " characters worth of HTML\n" );
423 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
424 || !xml_parse($xml_parser, $from, false)
425 || !xml_parse($xml_parser, '</body>', true)){
426 $error = xml_error_string(xml_get_error_code($xml_parser));
427 $line = xml_get_current_line_number($xml_parser);
428 HTMLDiffer::diffDebug( "XML error: $error at line $line\n" );
429 }
430 xml_parser_free($xml_parser);
431 unset($from);
432
433 $xml_parser = xml_parser_create('');
434
435 $domto = new DomTreeBuilder();
436
437 // Set the functions to handle opening and closing tags
438 xml_set_element_handler($xml_parser, array($domto, "startElement"), array($domto, "endElement"));
439
440 // Set the function to handle blocks of character data
441 xml_set_character_data_handler($xml_parser, array($domto, "characters"));
442
443 HTMLDiffer::diffDebug( "Parsing " . strlen($to) . " characters worth of HTML\n" );
444 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
445 || !xml_parse($xml_parser, $to, false)
446 || !xml_parse($xml_parser, '</body>', true)){
447 $error = xml_error_string(xml_get_error_code($xml_parser));
448 $line = xml_get_current_line_number($xml_parser);
449 HTMLDiffer::diffDebug( "XML error: $error at line $line\n" );
450 }
451 xml_parser_free($xml_parser);
452 unset($to);
453
454 $diffengine = new WikiDiff3();
455 $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
456 unset($xml_parser, $diffengine);
457
458 $domdiffer = new TextNodeDiffer($domto, $domfrom);
459
460 $currentIndexLeft = 0;
461 $currentIndexRight = 0;
462 foreach ($differences as &$d) {
463 if ($d->leftstart > $currentIndexLeft) {
464 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
465 $currentIndexRight, $d->rightstart);
466 }
467 if ($d->leftlength > 0) {
468 $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
469 }
470 $domdiffer->markAsNew($d->rightstart, $d->rightend);
471
472 $currentIndexLeft = $d->leftend;
473 $currentIndexRight = $d->rightend;
474 }
475 $oldLength = $domdiffer->lengthOld();
476 if ($currentIndexLeft < $oldLength) {
477 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $oldLength, $currentIndexRight, $domdiffer->lengthNew());
478 }
479 $domdiffer->expandWhiteSpace();
480 $output = new HTMLOutput('htmldiff', $this->output);
481 $output->parse($domdiffer->bodyNode);
482 wfProfileOut( __METHOD__ );
483 }
484
485 private function preProcess(/*array*/ $differences) {
486 $newRanges = array();
487
488 $nbDifferences = count($differences);
489 for ($i = 0; $i < $nbDifferences; ++$i) {
490 $leftStart = $differences[$i]->leftstart;
491 $leftEnd = $differences[$i]->leftend;
492 $rightStart = $differences[$i]->rightstart;
493 $rightEnd = $differences[$i]->rightend;
494
495 $leftLength = $leftEnd - $leftStart;
496 $rightLength = $rightEnd - $rightStart;
497
498 while ($i + 1 < $nbDifferences && self::score($leftLength,
499 $differences[$i + 1]->leftlength,
500 $rightLength,
501 $differences[$i + 1]->rightlength)
502 > ($differences[$i + 1]->leftstart - $leftEnd)) {
503 $leftEnd = $differences[$i + 1]->leftend;
504 $rightEnd = $differences[$i + 1]->rightend;
505 $leftLength = $leftEnd - $leftStart;
506 $rightLength = $rightEnd - $rightStart;
507 ++$i;
508 }
509 $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
510 }
511 return $newRanges;
512 }
513
514 /**
515 * Heuristic to merge differences for readability.
516 */
517 public static function score($ll, $nll, $rl, $nrl) {
518 if (($ll == 0 && $nll == 0)
519 || ($rl == 0 && $nrl == 0)) {
520 return 0;
521 }
522 $numbers = array($ll, $nll, $rl, $nrl);
523 $d = 0;
524 foreach ($numbers as &$number) {
525 while ($number > 3) {
526 $d += 3;
527 $number -= 3;
528 $number *= 0.5;
529 }
530 $d += $number;
531
532 }
533 return $d / (1.5 * count($numbers));
534 }
535
536 /**
537 * Add to debug output
538 * @param string $str Debug output
539 */
540 public static function diffDebug( $str ) {
541 self :: $debug .= $str;
542 }
543
544 /**
545 * Get debug output
546 * @return string
547 */
548 public static function getDebugOutput() {
549 return self :: $debug;
550 }
551
552 }
553
554 class TextOnlyComparator {
555
556 public $leafs = array();
557
558 function _construct(TagNode $tree) {
559 $this->addRecursive($tree);
560 $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
561 }
562
563 private function addRecursive(TagNode $tree) {
564 foreach ($tree->children as &$child) {
565 if ($child instanceof TagNode) {
566 $this->addRecursive($child);
567 } else if ($child instanceof TextNode) {
568 $this->leafs[] = $node;
569 }
570 }
571 }
572
573 public function getMatchRatio(TextOnlyComparator $other) {
574 $nbOthers = count($other->leafs);
575 $nbThis = count($this->leafs);
576 if($nbOthers == 0 || $nbThis == 0){
577 return -log(0);
578 }
579
580 $diffengine = new WikiDiff3(25000, 1.35);
581 $diffengine->diff($this->leafs, $other->leafs);
582
583 $lcsLength = $diffengine->getLcsLength();
584
585 $distanceThis = $nbThis-$lcsLength;
586
587 return (2.0 - $lcsLength/$nbOthers - $lcsLength/$nbThis) / 2.0;
588 }
589 }
590
591 class AncestorComparatorResult {
592
593 public $changed = false;
594
595 public $changes = "";
596 }
597
598 /**
599 * A comparator used when calculating the difference in ancestry of two Nodes.
600 */
601 class AncestorComparator {
602
603 public $ancestors;
604 public $ancestorsText;
605
606 function __construct(/*array*/ $ancestors) {
607 $this->ancestors = $ancestors;
608 $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
609 }
610
611 public $compareTxt = "";
612
613 public function getResult(AncestorComparator $other) {
614 $result = new AncestorComparatorResult();
615
616 $diffengine = new WikiDiff3(10000, 1.35);
617 $differences = $diffengine->diff_range($other->ancestorsText,$this->ancestorsText);
618
619 if (count($differences) == 0){
620 return $result;
621 }
622 $changeTxt = new ChangeTextGenerator($this, $other);
623
624 $result->changed = true;
625 $result->changes = $changeTxt->getChanged($differences)->toString();
626
627 return $result;
628 }
629 }
630
631 class ChangeTextGenerator {
632
633 private $ancestorComparator;
634 private $other;
635
636 private $factory;
637
638 function __construct(AncestorComparator $ancestorComparator, AncestorComparator $other) {
639 $this->ancestorComparator = $ancestorComparator;
640 $this->other = $other;
641 $this->factory = new TagToStringFactory();
642 }
643
644 public function getChanged(/*array*/ $differences) {
645 $txt = new ChangeText;
646 $rootlistopened = false;
647 if (count($differences) > 1) {
648 $txt->addHtml('<ul class="changelist">');
649 $rootlistopened = true;
650 }
651 $nbDifferences = count($differences);
652 for ($j = 0; $j < $nbDifferences; ++$j) {
653 $d = $differences[$j];
654 $lvl1listopened = false;
655 if ($rootlistopened) {
656 $txt->addHtml('<li>');
657 }
658 if ($d->leftlength + $d->rightlength > 1) {
659 $txt->addHtml('<ul class="changelist">');
660 $lvl1listopened = true;
661 }
662 // left are the old ones
663 for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
664 if ($lvl1listopened){
665 $txt->addHtml('<li>');
666 }
667 // add a bullet for a old tag
668 $this->addTagOld($txt, $this->other->ancestors[$i]);
669 if ($lvl1listopened){
670 $txt->addHtml('</li>');
671 }
672 }
673 // right are the new ones
674 for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
675 if ($lvl1listopened){
676 $txt->addHtml('<li>');
677 }
678 // add a bullet for a new tag
679 $this->addTagNew($txt, $this->ancestorComparator->ancestors[$i]);
680
681 if ($lvl1listopened){
682 $txt->addHtml('</li>');
683 }
684 }
685 if ($lvl1listopened) {
686 $txt->addHtml('</ul>');
687 }
688 if ($rootlistopened) {
689 $txt->addHtml('</li>');
690 }
691 }
692 if ($rootlistopened) {
693 $txt->addHtml('</ul>');
694 }
695 return $txt;
696 }
697
698 private function addTagOld(ChangeText $txt, TagNode $ancestor) {
699 $this->factory->create($ancestor)->getRemovedDescription($txt);
700 }
701
702 private function addTagNew(ChangeText $txt, TagNode $ancestor) {
703 $this->factory->create($ancestor)->getAddedDescription($txt);
704 }
705 }
706
707 class ChangeText {
708
709 private $txt = "";
710
711 public function addHtml($s) {
712 $this->txt .= $s;
713 }
714
715 public function toString() {
716 return $this->txt;
717 }
718 }
719
720 class TagToStringFactory {
721
722 private static $containerTags = array('html', 'body', 'p', 'blockquote',
723 'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li',
724 'table', 'tbody', 'tr', 'td', 'th', 'br', 'hr', 'code', 'dl',
725 'dt', 'dd', 'input', 'form', 'img', 'span', 'a');
726
727 private static $styleTags = array('i', 'b', 'strong', 'em', 'font',
728 'big', 'del', 'tt', 'sub', 'sup', 'strike');
729
730 const MOVED = 1;
731 const STYLE = 2;
732 const UNKNOWN = 4;
733
734 public function create(TagNode $node) {
735 $sem = $this->getChangeSemantic($node->qName);
736 if (strcasecmp($node->qName,'a') == 0) {
737 return new AnchorToString($node, $sem);
738 }
739 if (strcasecmp($node->qName,'img') == 0) {
740 return new NoContentTagToString($node, $sem);
741 }
742 return new TagToString($node, $sem);
743 }
744
745 protected function getChangeSemantic($qname) {
746 if (in_array(strtolower($qname),self::$containerTags)) {
747 return self::MOVED;
748 }
749 if (in_array(strtolower($qname),self::$styleTags)) {
750 return self::STYLE;
751 }
752 return self::UNKNOWN;
753 }
754 }
755
756 class TagToString {
757
758 protected $node;
759
760 protected $sem;
761
762 function __construct(TagNode $node, $sem) {
763 $this->node = $node;
764 $this->sem = $sem;
765 }
766
767 public function getRemovedDescription(ChangeText $txt) {
768 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
769 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
770 $tagDescription = "&lt;" . $this->node->qName . "&gt;";
771 }
772 if ($this->sem == TagToStringFactory::MOVED) {
773 $txt->addHtml( wfMsgExt( 'diff-movedoutof', 'parseinline', $tagDescription ) );
774 } else if ($this->sem == TagToStringFactory::STYLE) {
775 $txt->addHtml( wfMsgExt( 'diff-styleremoved' , 'parseinline', $tagDescription ) );
776 } else {
777 $txt->addHtml( wfMsgExt( 'diff-removed' , 'parseinline', $tagDescription ) );
778 }
779 $this->addAttributes($txt, $this->node->attributes);
780 $txt->addHtml('.');
781 }
782
783 public function getAddedDescription(ChangeText $txt) {
784 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
785 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
786 $tagDescription = "&lt;" . $this->node->qName . "&gt;";
787 }
788 if ($this->sem == TagToStringFactory::MOVED) {
789 $txt->addHtml( wfMsgExt( 'diff-movedto' , 'parseinline', $tagDescription) );
790 } else if ($this->sem == TagToStringFactory::STYLE) {
791 $txt->addHtml( wfMsgExt( 'diff-styleadded', 'parseinline', $tagDescription ) );
792 } else {
793 $txt->addHtml( wfMsgExt( 'diff-added', 'parseinline', $tagDescription ) );
794 }
795 $this->addAttributes($txt, $this->node->attributes);
796 $txt->addHtml('.');
797 }
798
799 protected function addAttributes(ChangeText $txt, array $attributes) {
800 if (count($attributes) < 1) {
801 return;
802 }
803 $firstOne = true;
804 $nbAttributes_min_1 = count($attributes)-1;
805 $keys = array_keys($attributes);
806 for ($i=0;$i<$nbAttributes_min_1;$i++) {
807 $key = $keys[$i];
808 $attr = $attributes[$key];
809 if($firstOne) {
810 $firstOne = false;
811 $txt->addHtml( wfMsgExt('diff-with', 'escapenoentities', $this->translateArgument($key), htmlspecialchars($attr) ) );
812 continue;
813 }
814 $txt->addHtml( wfMsgExt( 'comma-separator', 'escapenoentities' ) .
815 wfMsgExt( 'diff-with-additional', 'escapenoentities',
816 $this->translateArgument( $key ), htmlspecialchars( $attr ) )
817 );
818 }
819
820 if ($nbAttributes_min_1 > 0) {
821 $txt->addHtml( wfMsgExt( 'diff-with-final', 'escapenoentities',
822 $this->translateArgument($keys[$nbAttributes_min_1]),
823 htmlspecialchars($attributes[$keys[$nbAttributes_min_1]]) ) );
824 }
825 }
826
827 protected function translateArgument($name) {
828 $translation = wfMsgExt('diff-' . $name, 'parseinline' );
829 if ( wfEmptyMsg( 'diff-' . $name, $translation ) ) {
830 $translation = "&lt;" . $name . "&gt;";;
831 }
832 return htmlspecialchars( $translation );
833 }
834 }
835
836 class NoContentTagToString extends TagToString {
837
838 function __construct(TagNode $node, $sem) {
839 parent::__construct($node, $sem);
840 }
841
842 public function getAddedDescription(ChangeText $txt) {
843 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
844 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
845 $tagDescription = "&lt;" . $this->node->qName . "&gt;";
846 }
847 $txt->addHtml( wfMsgExt('diff-changedto', 'parseinline', $tagDescription ) );
848 $this->addAttributes($txt, $this->node->attributes);
849 $txt->addHtml('.');
850 }
851
852 public function getRemovedDescription(ChangeText $txt) {
853 $txt->addHtml( wfMsgExt('diff-changedfrom', 'parseinline', $tagDescription ) );
854 $this->addAttributes($txt, $this->node->attributes);
855 $txt->addHtml('.');
856 }
857 }
858
859 class AnchorToString extends TagToString {
860
861 function __construct(TagNode $node, $sem) {
862 parent::__construct($node, $sem);
863 }
864
865 protected function addAttributes(ChangeText $txt, array $attributes) {
866 if (array_key_exists('href', $attributes)) {
867 $txt->addHtml(' ' . wfMsgExt( 'diff-withdestination', 'parseinline', htmlspecialchars($attributes['href']) ) );
868 unset($attributes['href']);
869 }
870 parent::addAttributes($txt, $attributes);
871 }
872 }
873
874 /**
875 * Takes a branch root and creates an HTML file for it.
876 */
877 class HTMLOutput{
878
879 private $prefix;
880 private $handler;
881
882 function __construct($prefix, $handler) {
883 $this->prefix = $prefix;
884 $this->handler = $handler;
885 }
886
887 public function parse(TagNode $node) {
888 $handler = &$this->handler;
889
890 if (strcasecmp($node->qName, 'img') != 0 && strcasecmp($node->qName, 'body') != 0) {
891 $handler->startElement($node->qName, $node->attributes);
892 }
893
894 $newStarted = false;
895 $remStarted = false;
896 $changeStarted = false;
897 $changeTXT = '';
898
899 foreach ($node->children as &$child) {
900 if ($child instanceof TagNode) {
901 if ($newStarted) {
902 $handler->endElement('span');
903 $newStarted = false;
904 } else if ($changeStarted) {
905 $handler->endElement('span');
906 $changeStarted = false;
907 } else if ($remStarted) {
908 $handler->endElement('span');
909 $remStarted = false;
910 }
911 $this->parse($child);
912 } else if ($child instanceof TextNode) {
913 $mod = $child->modification;
914
915 if ($newStarted && ($mod->type != Modification::ADDED || $mod->firstOfID)) {
916 $handler->endElement('span');
917 $newStarted = false;
918 } else if ($changeStarted && ($mod->type != Modification::CHANGED
919 || $mod->changes != $changeTXT || $mod->firstOfID)) {
920 $handler->endElement('span');
921 $changeStarted = false;
922 } else if ($remStarted && ($mod->type != Modification::REMOVED || $mod ->firstOfID)) {
923 $handler->endElement('span');
924 $remStarted = false;
925 }
926
927 // no else because a removed part can just be closed and a new
928 // part can start
929 if (!$newStarted && $mod->type == Modification::ADDED) {
930 $attrs = array('class' => 'diff-html-added');
931 if ($mod->firstOfID) {
932 $attrs['id'] = "added-{$this->prefix}-{$mod->id}";
933 }
934 $handler->startElement('span', $attrs);
935 $newStarted = true;
936 } else if (!$changeStarted && $mod->type == Modification::CHANGED) {
937 $attrs = array('class' => 'diff-html-changed');
938 if ($mod->firstOfID) {
939 $attrs['id'] = "changed-{$this->prefix}-{$mod->id}";
940 }
941 $handler->startElement('span', $attrs);
942
943 //tooltip
944 $handler->startElement('span', array('class' => 'tip'));
945 $handler->html($mod->changes);
946 $handler->endElement('span');
947
948 $changeStarted = true;
949 $changeTXT = $mod->changes;
950 } else if (!$remStarted && $mod->type == Modification::REMOVED) {
951 $attrs = array('class'=>'diff-html-removed');
952 if ($mod->firstOfID) {
953 $attrs['id'] = "removed-{$this->prefix}-{$mod->id}";
954 }
955 $handler->startElement('span', $attrs);
956 $remStarted = true;
957 }
958
959 $chars = $child->text;
960
961 if ($child instanceof ImageNode) {
962 $this->writeImage($child);
963 } else {
964 $handler->characters($chars);
965 }
966 }
967 }
968
969 if ($newStarted) {
970 $handler->endElement('span');
971 $newStarted = false;
972 } else if ($changeStarted) {
973 $handler->endElement('span');
974 $changeStarted = false;
975 } else if ($remStarted) {
976 $handler->endElement('span');
977 $remStarted = false;
978 }
979
980 if (strcasecmp($node->qName, 'img') != 0
981 && strcasecmp($node->qName, 'body') != 0) {
982 $handler->endElement($node->qName);
983 }
984 }
985
986 private function writeImage(ImageNode $imgNode) {
987 $attrs = $imgNode->attributes;
988 $this->handler->startElement('img', $attrs);
989 $this->handler->endElement('img');
990 }
991 }
992
993 class EchoingContentHandler {
994
995 function startElement($qname, /*array*/ $arguments) {
996 echo Xml::openElement($qname, $arguments);
997 }
998
999 function endElement($qname){
1000 echo Xml::closeElement($qname);
1001 }
1002
1003 function characters($chars){
1004 echo htmlspecialchars($chars);
1005 }
1006
1007 function html($html){
1008 echo $html;
1009 }
1010
1011 }
1012
1013 class DelegatingContentHandler {
1014
1015 private $delegate;
1016
1017 function __construct($delegate) {
1018 $this->delegate = $delegate;
1019 }
1020
1021 function startElement($qname, /*array*/ $arguments) {
1022 $this->delegate->addHtml(Xml::openElement($qname, $arguments));
1023 }
1024
1025 function endElement($qname){
1026 $this->delegate->addHtml(Xml::closeElement($qname));
1027 }
1028
1029 function characters($chars){
1030 $this->delegate->addHtml(htmlspecialchars($chars));
1031 }
1032
1033 function html($html){
1034 $this->delegate->addHtml($html);
1035 }
1036 }