Merge "Add a 'revdelete-selected-file' message on Special:RevisionDelete"
[lhc/web/wiklou.git] / includes / diff / DairikiDiff.php
1 <?php
2 /**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6 * You may copy this code freely under the conditions of the GPL.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup DifferenceEngine
25 * @defgroup DifferenceEngine DifferenceEngine
26 */
27
28 /**
29 * @todo document
30 * @private
31 * @ingroup DifferenceEngine
32 */
33 abstract class DiffOp {
34
35 /**
36 * @var string
37 */
38 public $type;
39
40 /**
41 * @var string[]
42 */
43 public $orig;
44
45 /**
46 * @var string[]
47 */
48 public $closing;
49
50 /**
51 * @return string
52 */
53 public function getType() {
54 return $this->type;
55 }
56
57 /**
58 * @return string[]
59 */
60 public function getOrig() {
61 return $this->orig;
62 }
63
64 /**
65 * @param int $i
66 * @return string|null
67 */
68 public function getClosing( $i = null ) {
69 if ( $i === null ) {
70 return $this->closing;
71 }
72 if ( array_key_exists( $i, $this->closing ) ) {
73 return $this->closing[$i];
74 }
75 return null;
76 }
77
78 abstract public function reverse();
79
80 /**
81 * @return int
82 */
83 public function norig() {
84 return $this->orig ? count( $this->orig ) : 0;
85 }
86
87 /**
88 * @return int
89 */
90 public function nclosing() {
91 return $this->closing ? count( $this->closing ) : 0;
92 }
93 }
94
95 /**
96 * @todo document
97 * @private
98 * @ingroup DifferenceEngine
99 */
100 class DiffOpCopy extends DiffOp {
101 public $type = 'copy';
102
103 public function __construct( $orig, $closing = false ) {
104 if ( !is_array( $closing ) ) {
105 $closing = $orig;
106 }
107 $this->orig = $orig;
108 $this->closing = $closing;
109 }
110
111 /**
112 * @return DiffOpCopy
113 */
114 public function reverse() {
115 return new DiffOpCopy( $this->closing, $this->orig );
116 }
117 }
118
119 /**
120 * @todo document
121 * @private
122 * @ingroup DifferenceEngine
123 */
124 class DiffOpDelete extends DiffOp {
125 public $type = 'delete';
126
127 public function __construct( $lines ) {
128 $this->orig = $lines;
129 $this->closing = false;
130 }
131
132 /**
133 * @return DiffOpAdd
134 */
135 public function reverse() {
136 return new DiffOpAdd( $this->orig );
137 }
138 }
139
140 /**
141 * @todo document
142 * @private
143 * @ingroup DifferenceEngine
144 */
145 class DiffOpAdd extends DiffOp {
146 public $type = 'add';
147
148 public function __construct( $lines ) {
149 $this->closing = $lines;
150 $this->orig = false;
151 }
152
153 /**
154 * @return DiffOpDelete
155 */
156 public function reverse() {
157 return new DiffOpDelete( $this->closing );
158 }
159 }
160
161 /**
162 * @todo document
163 * @private
164 * @ingroup DifferenceEngine
165 */
166 class DiffOpChange extends DiffOp {
167 public $type = 'change';
168
169 public function __construct( $orig, $closing ) {
170 $this->orig = $orig;
171 $this->closing = $closing;
172 }
173
174 /**
175 * @return DiffOpChange
176 */
177 public function reverse() {
178 return new DiffOpChange( $this->closing, $this->orig );
179 }
180 }
181
182 /**
183 * Class used internally by Diff to actually compute the diffs.
184 *
185 * The algorithm used here is mostly lifted from the perl module
186 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
187 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
188 *
189 * More ideas are taken from:
190 * http://www.ics.uci.edu/~eppstein/161/960229.html
191 *
192 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
193 * diffutils-2.7, which can be found at:
194 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
195 *
196 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
197 * are my own.
198 *
199 * Line length limits for robustness added by Tim Starling, 2005-08-31
200 * Alternative implementation added by Guy Van den Broeck, 2008-07-30
201 *
202 * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
203 * @private
204 * @ingroup DifferenceEngine
205 */
206 class DiffEngine {
207 const MAX_XREF_LENGTH = 10000;
208
209 protected $xchanged, $ychanged;
210
211 protected $xv = array(), $yv = array();
212 protected $xind = array(), $yind = array();
213
214 protected $seq = array(), $in_seq = array();
215
216 protected $lcs = 0;
217
218 /**
219 * @param string[] $from_lines
220 * @param string[] $to_lines
221 *
222 * @return DiffOp[]
223 */
224 public function diff( $from_lines, $to_lines ) {
225 wfProfileIn( __METHOD__ );
226
227 // Diff and store locally
228 $this->diffLocal( $from_lines, $to_lines );
229
230 // Merge edits when possible
231 $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged );
232 $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged );
233
234 // Compute the edit operations.
235 $n_from = count( $from_lines );
236 $n_to = count( $to_lines );
237
238 $edits = array();
239 $xi = $yi = 0;
240 while ( $xi < $n_from || $yi < $n_to ) {
241 assert( '$yi < $n_to || $this->xchanged[$xi]' );
242 assert( '$xi < $n_from || $this->ychanged[$yi]' );
243
244 // Skip matching "snake".
245 $copy = array();
246 while ( $xi < $n_from && $yi < $n_to
247 && !$this->xchanged[$xi] && !$this->ychanged[$yi]
248 ) {
249 $copy[] = $from_lines[$xi++];
250 ++$yi;
251 }
252 if ( $copy ) {
253 $edits[] = new DiffOpCopy( $copy );
254 }
255
256 // Find deletes & adds.
257 $delete = array();
258 while ( $xi < $n_from && $this->xchanged[$xi] ) {
259 $delete[] = $from_lines[$xi++];
260 }
261
262 $add = array();
263 while ( $yi < $n_to && $this->ychanged[$yi] ) {
264 $add[] = $to_lines[$yi++];
265 }
266
267 if ( $delete && $add ) {
268 $edits[] = new DiffOpChange( $delete, $add );
269 } elseif ( $delete ) {
270 $edits[] = new DiffOpDelete( $delete );
271 } elseif ( $add ) {
272 $edits[] = new DiffOpAdd( $add );
273 }
274 }
275 wfProfileOut( __METHOD__ );
276
277 return $edits;
278 }
279
280 /**
281 * @param string[] $from_lines
282 * @param string[] $to_lines
283 */
284 private function diffLocal( $from_lines, $to_lines ) {
285 global $wgExternalDiffEngine;
286 wfProfileIn( __METHOD__ );
287
288 if ( $wgExternalDiffEngine == 'wikidiff3' ) {
289 // wikidiff3
290 $wikidiff3 = new WikiDiff3();
291 $wikidiff3->diff( $from_lines, $to_lines );
292 $this->xchanged = $wikidiff3->removed;
293 $this->ychanged = $wikidiff3->added;
294 unset( $wikidiff3 );
295 } else {
296 // old diff
297 $n_from = count( $from_lines );
298 $n_to = count( $to_lines );
299 $this->xchanged = $this->ychanged = array();
300 $this->xv = $this->yv = array();
301 $this->xind = $this->yind = array();
302 unset( $this->seq );
303 unset( $this->in_seq );
304 unset( $this->lcs );
305
306 // Skip leading common lines.
307 for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
308 if ( $from_lines[$skip] !== $to_lines[$skip] ) {
309 break;
310 }
311 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
312 }
313 // Skip trailing common lines.
314 $xi = $n_from;
315 $yi = $n_to;
316 for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
317 if ( $from_lines[$xi] !== $to_lines[$yi] ) {
318 break;
319 }
320 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
321 }
322
323 // Ignore lines which do not exist in both files.
324 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
325 $xhash[$this->lineHash( $from_lines[$xi] )] = 1;
326 }
327
328 for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
329 $line = $to_lines[$yi];
330 if ( ( $this->ychanged[$yi] = empty( $xhash[$this->lineHash( $line )] ) ) ) {
331 continue;
332 }
333 $yhash[$this->lineHash( $line )] = 1;
334 $this->yv[] = $line;
335 $this->yind[] = $yi;
336 }
337 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
338 $line = $from_lines[$xi];
339 if ( ( $this->xchanged[$xi] = empty( $yhash[$this->lineHash( $line )] ) ) ) {
340 continue;
341 }
342 $this->xv[] = $line;
343 $this->xind[] = $xi;
344 }
345
346 // Find the LCS.
347 $this->compareSeq( 0, count( $this->xv ), 0, count( $this->yv ) );
348 }
349 wfProfileOut( __METHOD__ );
350 }
351
352 /**
353 * Returns the whole line if it's small enough, or the MD5 hash otherwise
354 *
355 * @param string $line
356 *
357 * @return string
358 */
359 private function lineHash( $line ) {
360 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
361 return md5( $line );
362 } else {
363 return $line;
364 }
365 }
366
367 /**
368 * Divide the Largest Common Subsequence (LCS) of the sequences
369 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
370 * sized segments.
371 *
372 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
373 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
374 * sub sequences. The first sub-sequence is contained in [X0, X1),
375 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
376 * that (X0, Y0) == (XOFF, YOFF) and
377 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
378 *
379 * This function assumes that the first lines of the specified portions
380 * of the two files do not match, and likewise that the last lines do not
381 * match. The caller must trim matching lines from the beginning and end
382 * of the portions it is going to specify.
383 *
384 * @param int $xoff
385 * @param int $xlim
386 * @param int $yoff
387 * @param int $ylim
388 * @param int $nchunks
389 *
390 * @return array List of two elements, integer and array[].
391 */
392 private function diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
393 $flip = false;
394
395 if ( $xlim - $xoff > $ylim - $yoff ) {
396 // Things seems faster (I'm not sure I understand why)
397 // when the shortest sequence in X.
398 $flip = true;
399 list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
400 }
401
402 if ( $flip ) {
403 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
404 $ymatches[$this->xv[$i]][] = $i;
405 }
406 } else {
407 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
408 $ymatches[$this->yv[$i]][] = $i;
409 }
410 }
411
412 $this->lcs = 0;
413 $this->seq[0] = $yoff - 1;
414 $this->in_seq = array();
415 $ymids[0] = array();
416
417 $numer = $xlim - $xoff + $nchunks - 1;
418 $x = $xoff;
419 for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
420 if ( $chunk > 0 ) {
421 for ( $i = 0; $i <= $this->lcs; $i++ ) {
422 $ymids[$i][$chunk - 1] = $this->seq[$i];
423 }
424 }
425
426 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $chunk ) / $nchunks );
427 for ( ; $x < $x1; $x++ ) {
428 $line = $flip ? $this->yv[$x] : $this->xv[$x];
429 if ( empty( $ymatches[$line] ) ) {
430 continue;
431 }
432
433 $k = 0;
434 $matches = $ymatches[$line];
435 reset( $matches );
436 while ( list( , $y ) = each( $matches ) ) {
437 if ( empty( $this->in_seq[$y] ) ) {
438 $k = $this->lcsPos( $y );
439 assert( '$k > 0' );
440 $ymids[$k] = $ymids[$k - 1];
441 break;
442 }
443 }
444
445 while ( list( , $y ) = each( $matches ) ) {
446 if ( $y > $this->seq[$k - 1] ) {
447 assert( '$y < $this->seq[$k]' );
448 // Optimization: this is a common case:
449 // next match is just replacing previous match.
450 $this->in_seq[$this->seq[$k]] = false;
451 $this->seq[$k] = $y;
452 $this->in_seq[$y] = 1;
453 } elseif ( empty( $this->in_seq[$y] ) ) {
454 $k = $this->lcsPos( $y );
455 assert( '$k > 0' );
456 $ymids[$k] = $ymids[$k - 1];
457 }
458 }
459 }
460 }
461
462 $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
463 $ymid = $ymids[$this->lcs];
464 for ( $n = 0; $n < $nchunks - 1; $n++ ) {
465 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
466 $y1 = $ymid[$n] + 1;
467 $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
468 }
469 $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
470
471 return array( $this->lcs, $seps );
472 }
473
474 /**
475 * @param int $ypos
476 *
477 * @return int
478 */
479 private function lcsPos( $ypos ) {
480 $end = $this->lcs;
481 if ( $end == 0 || $ypos > $this->seq[$end] ) {
482 $this->seq[++$this->lcs] = $ypos;
483 $this->in_seq[$ypos] = 1;
484
485 return $this->lcs;
486 }
487
488 $beg = 1;
489 while ( $beg < $end ) {
490 $mid = (int)( ( $beg + $end ) / 2 );
491 if ( $ypos > $this->seq[$mid] ) {
492 $beg = $mid + 1;
493 } else {
494 $end = $mid;
495 }
496 }
497
498 assert( '$ypos != $this->seq[$end]' );
499
500 $this->in_seq[$this->seq[$end]] = false;
501 $this->seq[$end] = $ypos;
502 $this->in_seq[$ypos] = 1;
503
504 return $end;
505 }
506
507 /**
508 * Find LCS of two sequences.
509 *
510 * The results are recorded in the vectors $this->{x,y}changed[], by
511 * storing a 1 in the element for each line that is an insertion
512 * or deletion (ie. is not in the LCS).
513 *
514 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
515 *
516 * Note that XLIM, YLIM are exclusive bounds.
517 * All line numbers are origin-0 and discarded lines are not counted.
518 *
519 * @param int $xoff
520 * @param int $xlim
521 * @param int $yoff
522 * @param int $ylim
523 */
524 private function compareSeq( $xoff, $xlim, $yoff, $ylim ) {
525 // Slide down the bottom initial diagonal.
526 while ( $xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff] ) {
527 ++$xoff;
528 ++$yoff;
529 }
530
531 // Slide up the top initial diagonal.
532 while ( $xlim > $xoff && $ylim > $yoff
533 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]
534 ) {
535 --$xlim;
536 --$ylim;
537 }
538
539 if ( $xoff == $xlim || $yoff == $ylim ) {
540 $lcs = 0;
541 } else {
542 // This is ad hoc but seems to work well.
543 // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
544 // $nchunks = max(2,min(8,(int)$nchunks));
545 $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
546 list( $lcs, $seps ) = $this->diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
547 }
548
549 if ( $lcs == 0 ) {
550 // X and Y sequences have no common subsequence:
551 // mark all changed.
552 while ( $yoff < $ylim ) {
553 $this->ychanged[$this->yind[$yoff++]] = 1;
554 }
555 while ( $xoff < $xlim ) {
556 $this->xchanged[$this->xind[$xoff++]] = 1;
557 }
558 } else {
559 // Use the partitions to split this problem into subproblems.
560 reset( $seps );
561 $pt1 = $seps[0];
562 while ( $pt2 = next( $seps ) ) {
563 $this->compareSeq( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
564 $pt1 = $pt2;
565 }
566 }
567 }
568
569 /**
570 * Adjust inserts/deletes of identical lines to join changes
571 * as much as possible.
572 *
573 * We do something when a run of changed lines include a
574 * line at one end and has an excluded, identical line at the other.
575 * We are free to choose which identical line is included.
576 * `compareseq' usually chooses the one at the beginning,
577 * but usually it is cleaner to consider the following identical line
578 * to be the "change".
579 *
580 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
581 */
582 private function shiftBoundaries( $lines, &$changed, $other_changed ) {
583 wfProfileIn( __METHOD__ );
584 $i = 0;
585 $j = 0;
586
587 assert( 'count($lines) == count($changed)' );
588 $len = count( $lines );
589 $other_len = count( $other_changed );
590
591 while ( 1 ) {
592 /*
593 * Scan forwards to find beginning of another run of changes.
594 * Also keep track of the corresponding point in the other file.
595 *
596 * Throughout this code, $i and $j are adjusted together so that
597 * the first $i elements of $changed and the first $j elements
598 * of $other_changed both contain the same number of zeros
599 * (unchanged lines).
600 * Furthermore, $j is always kept so that $j == $other_len or
601 * $other_changed[$j] == false.
602 */
603 while ( $j < $other_len && $other_changed[$j] ) {
604 $j++;
605 }
606
607 while ( $i < $len && !$changed[$i] ) {
608 assert( '$j < $other_len && ! $other_changed[$j]' );
609 $i++;
610 $j++;
611 while ( $j < $other_len && $other_changed[$j] ) {
612 $j++;
613 }
614 }
615
616 if ( $i == $len ) {
617 break;
618 }
619
620 $start = $i;
621
622 // Find the end of this run of changes.
623 while ( ++$i < $len && $changed[$i] ) {
624 continue;
625 }
626
627 do {
628 /*
629 * Record the length of this run of changes, so that
630 * we can later determine whether the run has grown.
631 */
632 $runlength = $i - $start;
633
634 /*
635 * Move the changed region back, so long as the
636 * previous unchanged line matches the last changed one.
637 * This merges with previous changed regions.
638 */
639 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
640 $changed[--$start] = 1;
641 $changed[--$i] = false;
642 while ( $start > 0 && $changed[$start - 1] ) {
643 $start--;
644 }
645 assert( '$j > 0' );
646 while ( $other_changed[--$j] ) {
647 continue;
648 }
649 assert( '$j >= 0 && !$other_changed[$j]' );
650 }
651
652 /*
653 * Set CORRESPONDING to the end of the changed run, at the last
654 * point where it corresponds to a changed run in the other file.
655 * CORRESPONDING == LEN means no such point has been found.
656 */
657 $corresponding = $j < $other_len ? $i : $len;
658
659 /*
660 * Move the changed region forward, so long as the
661 * first changed line matches the following unchanged one.
662 * This merges with following changed regions.
663 * Do this second, so that if there are no merges,
664 * the changed region is moved forward as far as possible.
665 */
666 while ( $i < $len && $lines[$start] == $lines[$i] ) {
667 $changed[$start++] = false;
668 $changed[$i++] = 1;
669 while ( $i < $len && $changed[$i] ) {
670 $i++;
671 }
672
673 assert( '$j < $other_len && ! $other_changed[$j]' );
674 $j++;
675 if ( $j < $other_len && $other_changed[$j] ) {
676 $corresponding = $i;
677 while ( $j < $other_len && $other_changed[$j] ) {
678 $j++;
679 }
680 }
681 }
682 } while ( $runlength != $i - $start );
683
684 /*
685 * If possible, move the fully-merged run of changes
686 * back to a corresponding run in the other file.
687 */
688 while ( $corresponding < $i ) {
689 $changed[--$start] = 1;
690 $changed[--$i] = 0;
691 assert( '$j > 0' );
692 while ( $other_changed[--$j] ) {
693 continue;
694 }
695 assert( '$j >= 0 && !$other_changed[$j]' );
696 }
697 }
698 wfProfileOut( __METHOD__ );
699 }
700 }
701
702 /**
703 * Class representing a 'diff' between two sequences of strings.
704 * @todo document
705 * @private
706 * @ingroup DifferenceEngine
707 */
708 class Diff {
709
710 /**
711 * @var DiffOp[]
712 */
713 public $edits;
714
715 /**
716 * Constructor.
717 * Computes diff between sequences of strings.
718 *
719 * @param string[] $from_lines An array of strings.
720 * Typically these are lines from a file.
721 * @param string[] $to_lines An array of strings.
722 */
723 public function __construct( $from_lines, $to_lines ) {
724 $eng = new DiffEngine;
725 $this->edits = $eng->diff( $from_lines, $to_lines );
726 }
727
728 /**
729 * @return DiffOp[]
730 */
731 public function getEdits() {
732 return $this->edits;
733 }
734
735 /**
736 * Compute reversed Diff.
737 *
738 * SYNOPSIS:
739 *
740 * $diff = new Diff($lines1, $lines2);
741 * $rev = $diff->reverse();
742 *
743 * @return Object A Diff object representing the inverse of the
744 * original diff.
745 */
746 public function reverse() {
747 $rev = $this;
748 $rev->edits = array();
749 /** @var DiffOp $edit */
750 foreach ( $this->edits as $edit ) {
751 $rev->edits[] = $edit->reverse();
752 }
753
754 return $rev;
755 }
756
757 /**
758 * Check for empty diff.
759 *
760 * @return bool True if two sequences were identical.
761 */
762 public function isEmpty() {
763 foreach ( $this->edits as $edit ) {
764 if ( $edit->type != 'copy' ) {
765 return false;
766 }
767 }
768
769 return true;
770 }
771
772 /**
773 * Compute the length of the Longest Common Subsequence (LCS).
774 *
775 * This is mostly for diagnostic purposed.
776 *
777 * @return int The length of the LCS.
778 */
779 public function lcs() {
780 $lcs = 0;
781 foreach ( $this->edits as $edit ) {
782 if ( $edit->type == 'copy' ) {
783 $lcs += count( $edit->orig );
784 }
785 }
786
787 return $lcs;
788 }
789
790 /**
791 * Get the original set of lines.
792 *
793 * This reconstructs the $from_lines parameter passed to the
794 * constructor.
795 *
796 * @return string[] The original sequence of strings.
797 */
798 public function orig() {
799 $lines = array();
800
801 foreach ( $this->edits as $edit ) {
802 if ( $edit->orig ) {
803 array_splice( $lines, count( $lines ), 0, $edit->orig );
804 }
805 }
806
807 return $lines;
808 }
809
810 /**
811 * Get the closing set of lines.
812 *
813 * This reconstructs the $to_lines parameter passed to the
814 * constructor.
815 *
816 * @return string[] The sequence of strings.
817 */
818 public function closing() {
819 $lines = array();
820
821 foreach ( $this->edits as $edit ) {
822 if ( $edit->closing ) {
823 array_splice( $lines, count( $lines ), 0, $edit->closing );
824 }
825 }
826
827 return $lines;
828 }
829 }
830
831 /**
832 * @todo document, bad name.
833 * @private
834 * @ingroup DifferenceEngine
835 */
836 class MappedDiff extends Diff {
837 /**
838 * Constructor.
839 *
840 * Computes diff between sequences of strings.
841 *
842 * This can be used to compute things like
843 * case-insensitve diffs, or diffs which ignore
844 * changes in white-space.
845 *
846 * @param string[] $from_lines An array of strings.
847 * Typically these are lines from a file.
848 * @param string[] $to_lines An array of strings.
849 * @param string[] $mapped_from_lines This array should
850 * have the same size number of elements as $from_lines.
851 * The elements in $mapped_from_lines and
852 * $mapped_to_lines are what is actually compared
853 * when computing the diff.
854 * @param string[] $mapped_to_lines This array should
855 * have the same number of elements as $to_lines.
856 */
857 public function __construct( $from_lines, $to_lines,
858 $mapped_from_lines, $mapped_to_lines ) {
859 wfProfileIn( __METHOD__ );
860
861 assert( 'count( $from_lines ) == count( $mapped_from_lines )' );
862 assert( 'count( $to_lines ) == count( $mapped_to_lines )' );
863
864 parent::__construct( $mapped_from_lines, $mapped_to_lines );
865
866 $xi = $yi = 0;
867 $editCount = count( $this->edits );
868 for ( $i = 0; $i < $editCount; $i++ ) {
869 $orig = &$this->edits[$i]->orig;
870 if ( is_array( $orig ) ) {
871 $orig = array_slice( $from_lines, $xi, count( $orig ) );
872 $xi += count( $orig );
873 }
874
875 $closing = &$this->edits[$i]->closing;
876 if ( is_array( $closing ) ) {
877 $closing = array_slice( $to_lines, $yi, count( $closing ) );
878 $yi += count( $closing );
879 }
880 }
881 wfProfileOut( __METHOD__ );
882 }
883 }
884
885 /**
886 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
887 */
888
889 /**
890 * @todo document
891 * @private
892 * @ingroup DifferenceEngine
893 */
894 class HWLDFWordAccumulator {
895 public $insClass = ' class="diffchange diffchange-inline"';
896 public $delClass = ' class="diffchange diffchange-inline"';
897
898 private $lines = array();
899 private $line = '';
900 private $group = '';
901 private $tag = '';
902
903 /**
904 * @param string $new_tag
905 */
906 private function flushGroup( $new_tag ) {
907 if ( $this->group !== '' ) {
908 if ( $this->tag == 'ins' ) {
909 $this->line .= "<ins{$this->insClass}>" .
910 htmlspecialchars( $this->group ) . '</ins>';
911 } elseif ( $this->tag == 'del' ) {
912 $this->line .= "<del{$this->delClass}>" .
913 htmlspecialchars( $this->group ) . '</del>';
914 } else {
915 $this->line .= htmlspecialchars( $this->group );
916 }
917 }
918 $this->group = '';
919 $this->tag = $new_tag;
920 }
921
922 /**
923 * @param string $new_tag
924 */
925 private function flushLine( $new_tag ) {
926 $this->flushGroup( $new_tag );
927 if ( $this->line != '' ) {
928 array_push( $this->lines, $this->line );
929 } else {
930 # make empty lines visible by inserting an NBSP
931 array_push( $this->lines, '&#160;' );
932 }
933 $this->line = '';
934 }
935
936 /**
937 * @param string[] $words
938 * @param string $tag
939 */
940 public function addWords( $words, $tag = '' ) {
941 if ( $tag != $this->tag ) {
942 $this->flushGroup( $tag );
943 }
944
945 foreach ( $words as $word ) {
946 // new-line should only come as first char of word.
947 if ( $word == '' ) {
948 continue;
949 }
950 if ( $word[0] == "\n" ) {
951 $this->flushLine( $tag );
952 $word = substr( $word, 1 );
953 }
954 assert( '!strstr( $word, "\n" )' );
955 $this->group .= $word;
956 }
957 }
958
959 /**
960 * @return string[]
961 */
962 public function getLines() {
963 $this->flushLine( '~done' );
964
965 return $this->lines;
966 }
967 }
968
969 /**
970 * @todo document
971 * @private
972 * @ingroup DifferenceEngine
973 */
974 class WordLevelDiff extends MappedDiff {
975 const MAX_LINE_LENGTH = 10000;
976
977 /**
978 * @param string[] $orig_lines
979 * @param string[] $closing_lines
980 */
981 public function __construct( $orig_lines, $closing_lines ) {
982 wfProfileIn( __METHOD__ );
983
984 list( $orig_words, $orig_stripped ) = $this->split( $orig_lines );
985 list( $closing_words, $closing_stripped ) = $this->split( $closing_lines );
986
987 parent::__construct( $orig_words, $closing_words,
988 $orig_stripped, $closing_stripped );
989 wfProfileOut( __METHOD__ );
990 }
991
992 /**
993 * @param string[] $lines
994 *
995 * @return array[]
996 */
997 private function split( $lines ) {
998 wfProfileIn( __METHOD__ );
999
1000 $words = array();
1001 $stripped = array();
1002 $first = true;
1003 foreach ( $lines as $line ) {
1004 # If the line is too long, just pretend the entire line is one big word
1005 # This prevents resource exhaustion problems
1006 if ( $first ) {
1007 $first = false;
1008 } else {
1009 $words[] = "\n";
1010 $stripped[] = "\n";
1011 }
1012 if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
1013 $words[] = $line;
1014 $stripped[] = $line;
1015 } else {
1016 $m = array();
1017 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1018 $line, $m )
1019 ) {
1020 foreach ( $m[0] as $word ) {
1021 $words[] = $word;
1022 }
1023 foreach ( $m[1] as $stripped_word ) {
1024 $stripped[] = $stripped_word;
1025 }
1026 }
1027 }
1028 }
1029 wfProfileOut( __METHOD__ );
1030
1031 return array( $words, $stripped );
1032 }
1033
1034 /**
1035 * @return string[]
1036 */
1037 public function orig() {
1038 wfProfileIn( __METHOD__ );
1039 $orig = new HWLDFWordAccumulator;
1040
1041 foreach ( $this->edits as $edit ) {
1042 if ( $edit->type == 'copy' ) {
1043 $orig->addWords( $edit->orig );
1044 } elseif ( $edit->orig ) {
1045 $orig->addWords( $edit->orig, 'del' );
1046 }
1047 }
1048 $lines = $orig->getLines();
1049 wfProfileOut( __METHOD__ );
1050
1051 return $lines;
1052 }
1053
1054 /**
1055 * @return string[]
1056 */
1057 public function closing() {
1058 wfProfileIn( __METHOD__ );
1059 $closing = new HWLDFWordAccumulator;
1060
1061 foreach ( $this->edits as $edit ) {
1062 if ( $edit->type == 'copy' ) {
1063 $closing->addWords( $edit->closing );
1064 } elseif ( $edit->closing ) {
1065 $closing->addWords( $edit->closing, 'ins' );
1066 }
1067 }
1068 $lines = $closing->getLines();
1069 wfProfileOut( __METHOD__ );
1070
1071 return $lines;
1072 }
1073
1074 }