Merge "Add `makeKey` and `makeGlobalKey` to BagOStuff"
[lhc/web/wiklou.git] / includes / diff / DairikiDiff.php
1 <?php
2 /**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6 * You may copy this code freely under the conditions of the GPL.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup DifferenceEngine
25 * @defgroup DifferenceEngine DifferenceEngine
26 */
27
28 /**
29 * @todo document
30 * @private
31 * @ingroup DifferenceEngine
32 */
33 abstract class DiffOp {
34
35 /**
36 * @var string
37 */
38 public $type;
39
40 /**
41 * @var string[]
42 */
43 public $orig;
44
45 /**
46 * @var string[]
47 */
48 public $closing;
49
50 /**
51 * @return string
52 */
53 public function getType() {
54 return $this->type;
55 }
56
57 /**
58 * @return string[]
59 */
60 public function getOrig() {
61 return $this->orig;
62 }
63
64 /**
65 * @param int $i
66 * @return string|null
67 */
68 public function getClosing( $i = null ) {
69 if ( $i === null ) {
70 return $this->closing;
71 }
72 if ( array_key_exists( $i, $this->closing ) ) {
73 return $this->closing[$i];
74 }
75 return null;
76 }
77
78 abstract public function reverse();
79
80 /**
81 * @return int
82 */
83 public function norig() {
84 return $this->orig ? count( $this->orig ) : 0;
85 }
86
87 /**
88 * @return int
89 */
90 public function nclosing() {
91 return $this->closing ? count( $this->closing ) : 0;
92 }
93 }
94
95 /**
96 * @todo document
97 * @private
98 * @ingroup DifferenceEngine
99 */
100 class DiffOpCopy extends DiffOp {
101 public $type = 'copy';
102
103 public function __construct( $orig, $closing = false ) {
104 if ( !is_array( $closing ) ) {
105 $closing = $orig;
106 }
107 $this->orig = $orig;
108 $this->closing = $closing;
109 }
110
111 /**
112 * @return DiffOpCopy
113 */
114 public function reverse() {
115 return new DiffOpCopy( $this->closing, $this->orig );
116 }
117 }
118
119 /**
120 * @todo document
121 * @private
122 * @ingroup DifferenceEngine
123 */
124 class DiffOpDelete extends DiffOp {
125 public $type = 'delete';
126
127 public function __construct( $lines ) {
128 $this->orig = $lines;
129 $this->closing = false;
130 }
131
132 /**
133 * @return DiffOpAdd
134 */
135 public function reverse() {
136 return new DiffOpAdd( $this->orig );
137 }
138 }
139
140 /**
141 * @todo document
142 * @private
143 * @ingroup DifferenceEngine
144 */
145 class DiffOpAdd extends DiffOp {
146 public $type = 'add';
147
148 public function __construct( $lines ) {
149 $this->closing = $lines;
150 $this->orig = false;
151 }
152
153 /**
154 * @return DiffOpDelete
155 */
156 public function reverse() {
157 return new DiffOpDelete( $this->closing );
158 }
159 }
160
161 /**
162 * @todo document
163 * @private
164 * @ingroup DifferenceEngine
165 */
166 class DiffOpChange extends DiffOp {
167 public $type = 'change';
168
169 public function __construct( $orig, $closing ) {
170 $this->orig = $orig;
171 $this->closing = $closing;
172 }
173
174 /**
175 * @return DiffOpChange
176 */
177 public function reverse() {
178 return new DiffOpChange( $this->closing, $this->orig );
179 }
180 }
181
182 /**
183 * Class used internally by Diff to actually compute the diffs.
184 *
185 * The algorithm used here is mostly lifted from the perl module
186 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
187 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
188 *
189 * More ideas are taken from:
190 * http://www.ics.uci.edu/~eppstein/161/960229.html
191 *
192 * Some ideas (and a bit of code) are from analyze.c, from GNU
193 * diffutils-2.7, which can be found at:
194 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
195 *
196 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
197 * are my own.
198 *
199 * Line length limits for robustness added by Tim Starling, 2005-08-31
200 * Alternative implementation added by Guy Van den Broeck, 2008-07-30
201 *
202 * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
203 * @private
204 * @ingroup DifferenceEngine
205 */
206 class DiffEngine {
207 const MAX_XREF_LENGTH = 10000;
208
209 protected $xchanged, $ychanged;
210
211 protected $xv = array(), $yv = array();
212 protected $xind = array(), $yind = array();
213
214 protected $seq = array(), $in_seq = array();
215
216 protected $lcs = 0;
217
218 /**
219 * @param string[] $from_lines
220 * @param string[] $to_lines
221 *
222 * @return DiffOp[]
223 */
224 public function diff( $from_lines, $to_lines ) {
225
226 // Diff and store locally
227 $this->diffLocal( $from_lines, $to_lines );
228
229 // Merge edits when possible
230 $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged );
231 $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged );
232
233 // Compute the edit operations.
234 $n_from = count( $from_lines );
235 $n_to = count( $to_lines );
236
237 $edits = array();
238 $xi = $yi = 0;
239 while ( $xi < $n_from || $yi < $n_to ) {
240 assert( '$yi < $n_to || $this->xchanged[$xi]' );
241 assert( '$xi < $n_from || $this->ychanged[$yi]' );
242
243 // Skip matching "snake".
244 $copy = array();
245 while ( $xi < $n_from && $yi < $n_to
246 && !$this->xchanged[$xi] && !$this->ychanged[$yi]
247 ) {
248 $copy[] = $from_lines[$xi++];
249 ++$yi;
250 }
251 if ( $copy ) {
252 $edits[] = new DiffOpCopy( $copy );
253 }
254
255 // Find deletes & adds.
256 $delete = array();
257 while ( $xi < $n_from && $this->xchanged[$xi] ) {
258 $delete[] = $from_lines[$xi++];
259 }
260
261 $add = array();
262 while ( $yi < $n_to && $this->ychanged[$yi] ) {
263 $add[] = $to_lines[$yi++];
264 }
265
266 if ( $delete && $add ) {
267 $edits[] = new DiffOpChange( $delete, $add );
268 } elseif ( $delete ) {
269 $edits[] = new DiffOpDelete( $delete );
270 } elseif ( $add ) {
271 $edits[] = new DiffOpAdd( $add );
272 }
273 }
274
275 return $edits;
276 }
277
278 /**
279 * @param string[] $from_lines
280 * @param string[] $to_lines
281 */
282 private function diffLocal( $from_lines, $to_lines ) {
283 global $wgExternalDiffEngine;
284
285 if ( $wgExternalDiffEngine == 'wikidiff3' ) {
286 // wikidiff3
287 $wikidiff3 = new WikiDiff3();
288 $wikidiff3->diff( $from_lines, $to_lines );
289 $this->xchanged = $wikidiff3->removed;
290 $this->ychanged = $wikidiff3->added;
291 unset( $wikidiff3 );
292 } else {
293 // old diff
294 $n_from = count( $from_lines );
295 $n_to = count( $to_lines );
296 $this->xchanged = $this->ychanged = array();
297 $this->xv = $this->yv = array();
298 $this->xind = $this->yind = array();
299 unset( $this->seq );
300 unset( $this->in_seq );
301 unset( $this->lcs );
302
303 // Skip leading common lines.
304 for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
305 if ( $from_lines[$skip] !== $to_lines[$skip] ) {
306 break;
307 }
308 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
309 }
310 // Skip trailing common lines.
311 $xi = $n_from;
312 $yi = $n_to;
313 for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
314 if ( $from_lines[$xi] !== $to_lines[$yi] ) {
315 break;
316 }
317 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
318 }
319
320 // Ignore lines which do not exist in both files.
321 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
322 $xhash[$this->lineHash( $from_lines[$xi] )] = 1;
323 }
324
325 for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
326 $line = $to_lines[$yi];
327 if ( ( $this->ychanged[$yi] = empty( $xhash[$this->lineHash( $line )] ) ) ) {
328 continue;
329 }
330 $yhash[$this->lineHash( $line )] = 1;
331 $this->yv[] = $line;
332 $this->yind[] = $yi;
333 }
334 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
335 $line = $from_lines[$xi];
336 if ( ( $this->xchanged[$xi] = empty( $yhash[$this->lineHash( $line )] ) ) ) {
337 continue;
338 }
339 $this->xv[] = $line;
340 $this->xind[] = $xi;
341 }
342
343 // Find the LCS.
344 $this->compareSeq( 0, count( $this->xv ), 0, count( $this->yv ) );
345 }
346 }
347
348 /**
349 * Returns the whole line if it's small enough, or the MD5 hash otherwise
350 *
351 * @param string $line
352 *
353 * @return string
354 */
355 private function lineHash( $line ) {
356 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
357 return md5( $line );
358 } else {
359 return $line;
360 }
361 }
362
363 /**
364 * Divide the Largest Common Subsequence (LCS) of the sequences
365 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
366 * sized segments.
367 *
368 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
369 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
370 * sub sequences. The first sub-sequence is contained in [X0, X1),
371 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
372 * that (X0, Y0) == (XOFF, YOFF) and
373 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
374 *
375 * This function assumes that the first lines of the specified portions
376 * of the two files do not match, and likewise that the last lines do not
377 * match. The caller must trim matching lines from the beginning and end
378 * of the portions it is going to specify.
379 *
380 * @param int $xoff
381 * @param int $xlim
382 * @param int $yoff
383 * @param int $ylim
384 * @param int $nchunks
385 *
386 * @return array List of two elements, integer and array[].
387 */
388 private function diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
389 $flip = false;
390
391 if ( $xlim - $xoff > $ylim - $yoff ) {
392 // Things seems faster (I'm not sure I understand why)
393 // when the shortest sequence in X.
394 $flip = true;
395 list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
396 }
397
398 if ( $flip ) {
399 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
400 $ymatches[$this->xv[$i]][] = $i;
401 }
402 } else {
403 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
404 $ymatches[$this->yv[$i]][] = $i;
405 }
406 }
407
408 $this->lcs = 0;
409 $this->seq[0] = $yoff - 1;
410 $this->in_seq = array();
411 $ymids[0] = array();
412
413 $numer = $xlim - $xoff + $nchunks - 1;
414 $x = $xoff;
415 for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
416 if ( $chunk > 0 ) {
417 for ( $i = 0; $i <= $this->lcs; $i++ ) {
418 $ymids[$i][$chunk - 1] = $this->seq[$i];
419 }
420 }
421
422 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $chunk ) / $nchunks );
423 // @codingStandardsIgnoreStart Ignore Squiz.WhiteSpace.SemicolonSpacing.Incorrect
424 for ( ; $x < $x1; $x++ ) {
425 // @codingStandardsIgnoreEnd
426 $line = $flip ? $this->yv[$x] : $this->xv[$x];
427 if ( empty( $ymatches[$line] ) ) {
428 continue;
429 }
430
431 $k = 0;
432 $matches = $ymatches[$line];
433 reset( $matches );
434 while ( list( , $y ) = each( $matches ) ) {
435 if ( empty( $this->in_seq[$y] ) ) {
436 $k = $this->lcsPos( $y );
437 assert( '$k > 0' );
438 $ymids[$k] = $ymids[$k - 1];
439 break;
440 }
441 }
442
443 while ( list( , $y ) = each( $matches ) ) {
444 if ( $y > $this->seq[$k - 1] ) {
445 assert( '$y < $this->seq[$k]' );
446 // Optimization: this is a common case:
447 // next match is just replacing previous match.
448 $this->in_seq[$this->seq[$k]] = false;
449 $this->seq[$k] = $y;
450 $this->in_seq[$y] = 1;
451 } elseif ( empty( $this->in_seq[$y] ) ) {
452 $k = $this->lcsPos( $y );
453 assert( '$k > 0' );
454 $ymids[$k] = $ymids[$k - 1];
455 }
456 }
457 }
458 }
459
460 $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
461 $ymid = $ymids[$this->lcs];
462 for ( $n = 0; $n < $nchunks - 1; $n++ ) {
463 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
464 $y1 = $ymid[$n] + 1;
465 $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
466 }
467 $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
468
469 return array( $this->lcs, $seps );
470 }
471
472 /**
473 * @param int $ypos
474 *
475 * @return int
476 */
477 private function lcsPos( $ypos ) {
478 $end = $this->lcs;
479 if ( $end == 0 || $ypos > $this->seq[$end] ) {
480 $this->seq[++$this->lcs] = $ypos;
481 $this->in_seq[$ypos] = 1;
482
483 return $this->lcs;
484 }
485
486 $beg = 1;
487 while ( $beg < $end ) {
488 $mid = (int)( ( $beg + $end ) / 2 );
489 if ( $ypos > $this->seq[$mid] ) {
490 $beg = $mid + 1;
491 } else {
492 $end = $mid;
493 }
494 }
495
496 assert( '$ypos != $this->seq[$end]' );
497
498 $this->in_seq[$this->seq[$end]] = false;
499 $this->seq[$end] = $ypos;
500 $this->in_seq[$ypos] = 1;
501
502 return $end;
503 }
504
505 /**
506 * Find LCS of two sequences.
507 *
508 * The results are recorded in the vectors $this->{x,y}changed[], by
509 * storing a 1 in the element for each line that is an insertion
510 * or deletion (ie. is not in the LCS).
511 *
512 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
513 *
514 * Note that XLIM, YLIM are exclusive bounds.
515 * All line numbers are origin-0 and discarded lines are not counted.
516 *
517 * @param int $xoff
518 * @param int $xlim
519 * @param int $yoff
520 * @param int $ylim
521 */
522 private function compareSeq( $xoff, $xlim, $yoff, $ylim ) {
523 // Slide down the bottom initial diagonal.
524 while ( $xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff] ) {
525 ++$xoff;
526 ++$yoff;
527 }
528
529 // Slide up the top initial diagonal.
530 while ( $xlim > $xoff && $ylim > $yoff
531 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]
532 ) {
533 --$xlim;
534 --$ylim;
535 }
536
537 if ( $xoff == $xlim || $yoff == $ylim ) {
538 $lcs = 0;
539 } else {
540 // This is ad hoc but seems to work well.
541 // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
542 // $nchunks = max(2,min(8,(int)$nchunks));
543 $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
544 list( $lcs, $seps ) = $this->diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
545 }
546
547 if ( $lcs == 0 ) {
548 // X and Y sequences have no common subsequence:
549 // mark all changed.
550 while ( $yoff < $ylim ) {
551 $this->ychanged[$this->yind[$yoff++]] = 1;
552 }
553 while ( $xoff < $xlim ) {
554 $this->xchanged[$this->xind[$xoff++]] = 1;
555 }
556 } else {
557 // Use the partitions to split this problem into subproblems.
558 reset( $seps );
559 $pt1 = $seps[0];
560 while ( $pt2 = next( $seps ) ) {
561 $this->compareSeq( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
562 $pt1 = $pt2;
563 }
564 }
565 }
566
567 /**
568 * Adjust inserts/deletes of identical lines to join changes
569 * as much as possible.
570 *
571 * We do something when a run of changed lines include a
572 * line at one end and has an excluded, identical line at the other.
573 * We are free to choose which identical line is included.
574 * `compareseq' usually chooses the one at the beginning,
575 * but usually it is cleaner to consider the following identical line
576 * to be the "change".
577 *
578 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
579 */
580 private function shiftBoundaries( $lines, &$changed, $other_changed ) {
581 $i = 0;
582 $j = 0;
583
584 assert( 'count($lines) == count($changed)' );
585 $len = count( $lines );
586 $other_len = count( $other_changed );
587
588 while ( 1 ) {
589 /*
590 * Scan forwards to find beginning of another run of changes.
591 * Also keep track of the corresponding point in the other file.
592 *
593 * Throughout this code, $i and $j are adjusted together so that
594 * the first $i elements of $changed and the first $j elements
595 * of $other_changed both contain the same number of zeros
596 * (unchanged lines).
597 * Furthermore, $j is always kept so that $j == $other_len or
598 * $other_changed[$j] == false.
599 */
600 while ( $j < $other_len && $other_changed[$j] ) {
601 $j++;
602 }
603
604 while ( $i < $len && !$changed[$i] ) {
605 assert( '$j < $other_len && ! $other_changed[$j]' );
606 $i++;
607 $j++;
608 while ( $j < $other_len && $other_changed[$j] ) {
609 $j++;
610 }
611 }
612
613 if ( $i == $len ) {
614 break;
615 }
616
617 $start = $i;
618
619 // Find the end of this run of changes.
620 while ( ++$i < $len && $changed[$i] ) {
621 continue;
622 }
623
624 do {
625 /*
626 * Record the length of this run of changes, so that
627 * we can later determine whether the run has grown.
628 */
629 $runlength = $i - $start;
630
631 /*
632 * Move the changed region back, so long as the
633 * previous unchanged line matches the last changed one.
634 * This merges with previous changed regions.
635 */
636 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
637 $changed[--$start] = 1;
638 $changed[--$i] = false;
639 while ( $start > 0 && $changed[$start - 1] ) {
640 $start--;
641 }
642 assert( '$j > 0' );
643 while ( $other_changed[--$j] ) {
644 continue;
645 }
646 assert( '$j >= 0 && !$other_changed[$j]' );
647 }
648
649 /*
650 * Set CORRESPONDING to the end of the changed run, at the last
651 * point where it corresponds to a changed run in the other file.
652 * CORRESPONDING == LEN means no such point has been found.
653 */
654 $corresponding = $j < $other_len ? $i : $len;
655
656 /*
657 * Move the changed region forward, so long as the
658 * first changed line matches the following unchanged one.
659 * This merges with following changed regions.
660 * Do this second, so that if there are no merges,
661 * the changed region is moved forward as far as possible.
662 */
663 while ( $i < $len && $lines[$start] == $lines[$i] ) {
664 $changed[$start++] = false;
665 $changed[$i++] = 1;
666 while ( $i < $len && $changed[$i] ) {
667 $i++;
668 }
669
670 assert( '$j < $other_len && ! $other_changed[$j]' );
671 $j++;
672 if ( $j < $other_len && $other_changed[$j] ) {
673 $corresponding = $i;
674 while ( $j < $other_len && $other_changed[$j] ) {
675 $j++;
676 }
677 }
678 }
679 } while ( $runlength != $i - $start );
680
681 /*
682 * If possible, move the fully-merged run of changes
683 * back to a corresponding run in the other file.
684 */
685 while ( $corresponding < $i ) {
686 $changed[--$start] = 1;
687 $changed[--$i] = 0;
688 assert( '$j > 0' );
689 while ( $other_changed[--$j] ) {
690 continue;
691 }
692 assert( '$j >= 0 && !$other_changed[$j]' );
693 }
694 }
695 }
696 }
697
698 /**
699 * Class representing a 'diff' between two sequences of strings.
700 * @todo document
701 * @private
702 * @ingroup DifferenceEngine
703 */
704 class Diff {
705
706 /**
707 * @var DiffOp[]
708 */
709 public $edits;
710
711 /**
712 * Constructor.
713 * Computes diff between sequences of strings.
714 *
715 * @param string[] $from_lines An array of strings.
716 * Typically these are lines from a file.
717 * @param string[] $to_lines An array of strings.
718 */
719 public function __construct( $from_lines, $to_lines ) {
720 $eng = new DiffEngine;
721 $this->edits = $eng->diff( $from_lines, $to_lines );
722 }
723
724 /**
725 * @return DiffOp[]
726 */
727 public function getEdits() {
728 return $this->edits;
729 }
730
731 /**
732 * Compute reversed Diff.
733 *
734 * SYNOPSIS:
735 *
736 * $diff = new Diff($lines1, $lines2);
737 * $rev = $diff->reverse();
738 *
739 * @return Object A Diff object representing the inverse of the
740 * original diff.
741 */
742 public function reverse() {
743 $rev = $this;
744 $rev->edits = array();
745 /** @var DiffOp $edit */
746 foreach ( $this->edits as $edit ) {
747 $rev->edits[] = $edit->reverse();
748 }
749
750 return $rev;
751 }
752
753 /**
754 * Check for empty diff.
755 *
756 * @return bool True if two sequences were identical.
757 */
758 public function isEmpty() {
759 foreach ( $this->edits as $edit ) {
760 if ( $edit->type != 'copy' ) {
761 return false;
762 }
763 }
764
765 return true;
766 }
767
768 /**
769 * Compute the length of the Longest Common Subsequence (LCS).
770 *
771 * This is mostly for diagnostic purposed.
772 *
773 * @return int The length of the LCS.
774 */
775 public function lcs() {
776 $lcs = 0;
777 foreach ( $this->edits as $edit ) {
778 if ( $edit->type == 'copy' ) {
779 $lcs += count( $edit->orig );
780 }
781 }
782
783 return $lcs;
784 }
785
786 /**
787 * Get the original set of lines.
788 *
789 * This reconstructs the $from_lines parameter passed to the
790 * constructor.
791 *
792 * @return string[] The original sequence of strings.
793 */
794 public function orig() {
795 $lines = array();
796
797 foreach ( $this->edits as $edit ) {
798 if ( $edit->orig ) {
799 array_splice( $lines, count( $lines ), 0, $edit->orig );
800 }
801 }
802
803 return $lines;
804 }
805
806 /**
807 * Get the closing set of lines.
808 *
809 * This reconstructs the $to_lines parameter passed to the
810 * constructor.
811 *
812 * @return string[] The sequence of strings.
813 */
814 public function closing() {
815 $lines = array();
816
817 foreach ( $this->edits as $edit ) {
818 if ( $edit->closing ) {
819 array_splice( $lines, count( $lines ), 0, $edit->closing );
820 }
821 }
822
823 return $lines;
824 }
825 }
826
827 /**
828 * @todo document, bad name.
829 * @private
830 * @ingroup DifferenceEngine
831 */
832 class MappedDiff extends Diff {
833 /**
834 * Constructor.
835 *
836 * Computes diff between sequences of strings.
837 *
838 * This can be used to compute things like
839 * case-insensitve diffs, or diffs which ignore
840 * changes in white-space.
841 *
842 * @param string[] $from_lines An array of strings.
843 * Typically these are lines from a file.
844 * @param string[] $to_lines An array of strings.
845 * @param string[] $mapped_from_lines This array should
846 * have the same size number of elements as $from_lines.
847 * The elements in $mapped_from_lines and
848 * $mapped_to_lines are what is actually compared
849 * when computing the diff.
850 * @param string[] $mapped_to_lines This array should
851 * have the same number of elements as $to_lines.
852 */
853 public function __construct( $from_lines, $to_lines,
854 $mapped_from_lines, $mapped_to_lines ) {
855
856 assert( 'count( $from_lines ) == count( $mapped_from_lines )' );
857 assert( 'count( $to_lines ) == count( $mapped_to_lines )' );
858
859 parent::__construct( $mapped_from_lines, $mapped_to_lines );
860
861 $xi = $yi = 0;
862 $editCount = count( $this->edits );
863 for ( $i = 0; $i < $editCount; $i++ ) {
864 $orig = &$this->edits[$i]->orig;
865 if ( is_array( $orig ) ) {
866 $orig = array_slice( $from_lines, $xi, count( $orig ) );
867 $xi += count( $orig );
868 }
869
870 $closing = &$this->edits[$i]->closing;
871 if ( is_array( $closing ) ) {
872 $closing = array_slice( $to_lines, $yi, count( $closing ) );
873 $yi += count( $closing );
874 }
875 }
876 }
877 }
878
879 /**
880 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
881 */
882
883 /**
884 * @todo document
885 * @private
886 * @ingroup DifferenceEngine
887 */
888 class HWLDFWordAccumulator {
889 public $insClass = ' class="diffchange diffchange-inline"';
890 public $delClass = ' class="diffchange diffchange-inline"';
891
892 private $lines = array();
893 private $line = '';
894 private $group = '';
895 private $tag = '';
896
897 /**
898 * @param string $new_tag
899 */
900 private function flushGroup( $new_tag ) {
901 if ( $this->group !== '' ) {
902 if ( $this->tag == 'ins' ) {
903 $this->line .= "<ins{$this->insClass}>" .
904 htmlspecialchars( $this->group ) . '</ins>';
905 } elseif ( $this->tag == 'del' ) {
906 $this->line .= "<del{$this->delClass}>" .
907 htmlspecialchars( $this->group ) . '</del>';
908 } else {
909 $this->line .= htmlspecialchars( $this->group );
910 }
911 }
912 $this->group = '';
913 $this->tag = $new_tag;
914 }
915
916 /**
917 * @param string $new_tag
918 */
919 private function flushLine( $new_tag ) {
920 $this->flushGroup( $new_tag );
921 if ( $this->line != '' ) {
922 array_push( $this->lines, $this->line );
923 } else {
924 # make empty lines visible by inserting an NBSP
925 array_push( $this->lines, '&#160;' );
926 }
927 $this->line = '';
928 }
929
930 /**
931 * @param string[] $words
932 * @param string $tag
933 */
934 public function addWords( $words, $tag = '' ) {
935 if ( $tag != $this->tag ) {
936 $this->flushGroup( $tag );
937 }
938
939 foreach ( $words as $word ) {
940 // new-line should only come as first char of word.
941 if ( $word == '' ) {
942 continue;
943 }
944 if ( $word[0] == "\n" ) {
945 $this->flushLine( $tag );
946 $word = substr( $word, 1 );
947 }
948 assert( '!strstr( $word, "\n" )' );
949 $this->group .= $word;
950 }
951 }
952
953 /**
954 * @return string[]
955 */
956 public function getLines() {
957 $this->flushLine( '~done' );
958
959 return $this->lines;
960 }
961 }
962
963 /**
964 * @todo document
965 * @private
966 * @ingroup DifferenceEngine
967 */
968 class WordLevelDiff extends MappedDiff {
969 const MAX_LINE_LENGTH = 10000;
970
971 /**
972 * @param string[] $orig_lines
973 * @param string[] $closing_lines
974 */
975 public function __construct( $orig_lines, $closing_lines ) {
976
977 list( $orig_words, $orig_stripped ) = $this->split( $orig_lines );
978 list( $closing_words, $closing_stripped ) = $this->split( $closing_lines );
979
980 parent::__construct( $orig_words, $closing_words,
981 $orig_stripped, $closing_stripped );
982 }
983
984 /**
985 * @param string[] $lines
986 *
987 * @return array[]
988 */
989 private function split( $lines ) {
990
991 $words = array();
992 $stripped = array();
993 $first = true;
994 foreach ( $lines as $line ) {
995 # If the line is too long, just pretend the entire line is one big word
996 # This prevents resource exhaustion problems
997 if ( $first ) {
998 $first = false;
999 } else {
1000 $words[] = "\n";
1001 $stripped[] = "\n";
1002 }
1003 if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
1004 $words[] = $line;
1005 $stripped[] = $line;
1006 } else {
1007 $m = array();
1008 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1009 $line, $m )
1010 ) {
1011 foreach ( $m[0] as $word ) {
1012 $words[] = $word;
1013 }
1014 foreach ( $m[1] as $stripped_word ) {
1015 $stripped[] = $stripped_word;
1016 }
1017 }
1018 }
1019 }
1020
1021 return array( $words, $stripped );
1022 }
1023
1024 /**
1025 * @return string[]
1026 */
1027 public function orig() {
1028 $orig = new HWLDFWordAccumulator;
1029
1030 foreach ( $this->edits as $edit ) {
1031 if ( $edit->type == 'copy' ) {
1032 $orig->addWords( $edit->orig );
1033 } elseif ( $edit->orig ) {
1034 $orig->addWords( $edit->orig, 'del' );
1035 }
1036 }
1037 $lines = $orig->getLines();
1038
1039 return $lines;
1040 }
1041
1042 /**
1043 * @return string[]
1044 */
1045 public function closing() {
1046 $closing = new HWLDFWordAccumulator;
1047
1048 foreach ( $this->edits as $edit ) {
1049 if ( $edit->type == 'copy' ) {
1050 $closing->addWords( $edit->closing );
1051 } elseif ( $edit->closing ) {
1052 $closing->addWords( $edit->closing, 'ins' );
1053 }
1054 }
1055 $lines = $closing->getLines();
1056
1057 return $lines;
1058 }
1059
1060 }