Merge "Documentation: Remove paragraph about not creating a 2nd WebRequest"
[lhc/web/wiklou.git] / includes / diff / DairikiDiff.php
1 <?php
2 /**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6 * You may copy this code freely under the conditions of the GPL.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup DifferenceEngine
25 * @defgroup DifferenceEngine DifferenceEngine
26 */
27
28 /**
29 * The base class for all other DiffOp classes.
30 *
31 * The classes that extend DiffOp are: DiffOpCopy, DiffOpDelete, DiffOpAdd and
32 * DiffOpChange. FakeDiffOp also extends DiffOp, but it is not located in this file.
33 *
34 * @private
35 * @ingroup DifferenceEngine
36 */
37 abstract class DiffOp {
38
39 /**
40 * @var string
41 */
42 public $type;
43
44 /**
45 * @var string[]
46 */
47 public $orig;
48
49 /**
50 * @var string[]
51 */
52 public $closing;
53
54 /**
55 * @return string
56 */
57 public function getType() {
58 return $this->type;
59 }
60
61 /**
62 * @return string[]
63 */
64 public function getOrig() {
65 return $this->orig;
66 }
67
68 /**
69 * @param int $i
70 * @return string|null
71 */
72 public function getClosing( $i = null ) {
73 if ( $i === null ) {
74 return $this->closing;
75 }
76 if ( array_key_exists( $i, $this->closing ) ) {
77 return $this->closing[$i];
78 }
79 return null;
80 }
81
82 abstract public function reverse();
83
84 /**
85 * @return int
86 */
87 public function norig() {
88 return $this->orig ? count( $this->orig ) : 0;
89 }
90
91 /**
92 * @return int
93 */
94 public function nclosing() {
95 return $this->closing ? count( $this->closing ) : 0;
96 }
97 }
98
99 /**
100 * Extends DiffOp. Used to mark strings that have been
101 * copied from one string array to the other.
102 *
103 * @private
104 * @ingroup DifferenceEngine
105 */
106 class DiffOpCopy extends DiffOp {
107 public $type = 'copy';
108
109 public function __construct( $orig, $closing = false ) {
110 if ( !is_array( $closing ) ) {
111 $closing = $orig;
112 }
113 $this->orig = $orig;
114 $this->closing = $closing;
115 }
116
117 /**
118 * @return DiffOpCopy
119 */
120 public function reverse() {
121 return new DiffOpCopy( $this->closing, $this->orig );
122 }
123 }
124
125 /**
126 * Extends DiffOp. Used to mark strings that have been
127 * deleted from the first string array.
128 *
129 * @private
130 * @ingroup DifferenceEngine
131 */
132 class DiffOpDelete extends DiffOp {
133 public $type = 'delete';
134
135 public function __construct( $lines ) {
136 $this->orig = $lines;
137 $this->closing = false;
138 }
139
140 /**
141 * @return DiffOpAdd
142 */
143 public function reverse() {
144 return new DiffOpAdd( $this->orig );
145 }
146 }
147
148 /**
149 * Extends DiffOp. Used to mark strings that have been
150 * added from the first string array.
151 *
152 * @private
153 * @ingroup DifferenceEngine
154 */
155 class DiffOpAdd extends DiffOp {
156 public $type = 'add';
157
158 public function __construct( $lines ) {
159 $this->closing = $lines;
160 $this->orig = false;
161 }
162
163 /**
164 * @return DiffOpDelete
165 */
166 public function reverse() {
167 return new DiffOpDelete( $this->closing );
168 }
169 }
170
171 /**
172 * Extends DiffOp. Used to mark strings that have been
173 * changed from the first string array (both added and subtracted).
174 *
175 * @private
176 * @ingroup DifferenceEngine
177 */
178 class DiffOpChange extends DiffOp {
179 public $type = 'change';
180
181 public function __construct( $orig, $closing ) {
182 $this->orig = $orig;
183 $this->closing = $closing;
184 }
185
186 /**
187 * @return DiffOpChange
188 */
189 public function reverse() {
190 return new DiffOpChange( $this->closing, $this->orig );
191 }
192 }
193
194 /**
195 * Class used internally by Diff to actually compute the diffs.
196 *
197 * The algorithm used here is mostly lifted from the perl module
198 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
199 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
200 *
201 * More ideas are taken from:
202 * http://www.ics.uci.edu/~eppstein/161/960229.html
203 *
204 * Some ideas (and a bit of code) are from analyze.c, from GNU
205 * diffutils-2.7, which can be found at:
206 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
207 *
208 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
209 * are my own.
210 *
211 * Line length limits for robustness added by Tim Starling, 2005-08-31
212 * Alternative implementation added by Guy Van den Broeck, 2008-07-30
213 *
214 * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
215 * @private
216 * @ingroup DifferenceEngine
217 */
218 class DiffEngine {
219 const MAX_XREF_LENGTH = 10000;
220
221 protected $xchanged, $ychanged;
222
223 protected $xv = array(), $yv = array();
224 protected $xind = array(), $yind = array();
225
226 protected $seq = array(), $in_seq = array();
227
228 protected $lcs = 0;
229
230 /**
231 * @param string[] $from_lines
232 * @param string[] $to_lines
233 *
234 * @return DiffOp[]
235 */
236 public function diff( $from_lines, $to_lines ) {
237
238 // Diff and store locally
239 $this->diffLocal( $from_lines, $to_lines );
240
241 // Merge edits when possible
242 $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged );
243 $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged );
244
245 // Compute the edit operations.
246 $n_from = count( $from_lines );
247 $n_to = count( $to_lines );
248
249 $edits = array();
250 $xi = $yi = 0;
251 while ( $xi < $n_from || $yi < $n_to ) {
252 assert( '$yi < $n_to || $this->xchanged[$xi]' );
253 assert( '$xi < $n_from || $this->ychanged[$yi]' );
254
255 // Skip matching "snake".
256 $copy = array();
257 while ( $xi < $n_from && $yi < $n_to
258 && !$this->xchanged[$xi] && !$this->ychanged[$yi]
259 ) {
260 $copy[] = $from_lines[$xi++];
261 ++$yi;
262 }
263 if ( $copy ) {
264 $edits[] = new DiffOpCopy( $copy );
265 }
266
267 // Find deletes & adds.
268 $delete = array();
269 while ( $xi < $n_from && $this->xchanged[$xi] ) {
270 $delete[] = $from_lines[$xi++];
271 }
272
273 $add = array();
274 while ( $yi < $n_to && $this->ychanged[$yi] ) {
275 $add[] = $to_lines[$yi++];
276 }
277
278 if ( $delete && $add ) {
279 $edits[] = new DiffOpChange( $delete, $add );
280 } elseif ( $delete ) {
281 $edits[] = new DiffOpDelete( $delete );
282 } elseif ( $add ) {
283 $edits[] = new DiffOpAdd( $add );
284 }
285 }
286
287 return $edits;
288 }
289
290 /**
291 * @param string[] $from_lines
292 * @param string[] $to_lines
293 */
294 private function diffLocal( $from_lines, $to_lines ) {
295 global $wgExternalDiffEngine;
296
297 if ( $wgExternalDiffEngine == 'wikidiff3' ) {
298 // wikidiff3
299 $wikidiff3 = new WikiDiff3();
300 $wikidiff3->diff( $from_lines, $to_lines );
301 $this->xchanged = $wikidiff3->removed;
302 $this->ychanged = $wikidiff3->added;
303 unset( $wikidiff3 );
304 } else {
305 // old diff
306 $n_from = count( $from_lines );
307 $n_to = count( $to_lines );
308 $this->xchanged = $this->ychanged = array();
309 $this->xv = $this->yv = array();
310 $this->xind = $this->yind = array();
311 $this->seq = array();
312 $this->in_seq = array();
313 $this->lcs = 0;
314
315 // Skip leading common lines.
316 for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
317 if ( $from_lines[$skip] !== $to_lines[$skip] ) {
318 break;
319 }
320 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
321 }
322 // Skip trailing common lines.
323 $xi = $n_from;
324 $yi = $n_to;
325 for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
326 if ( $from_lines[$xi] !== $to_lines[$yi] ) {
327 break;
328 }
329 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
330 }
331
332 // Ignore lines which do not exist in both files.
333 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
334 $xhash[$this->lineHash( $from_lines[$xi] )] = 1;
335 }
336
337 for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
338 $line = $to_lines[$yi];
339 $this->ychanged[$yi] = empty( $xhash[$this->lineHash( $line )] );
340 if ( $this->ychanged[$yi] ) {
341 continue;
342 }
343 $yhash[$this->lineHash( $line )] = 1;
344 $this->yv[] = $line;
345 $this->yind[] = $yi;
346 }
347 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
348 $line = $from_lines[$xi];
349 $this->xchanged[$xi] = empty( $yhash[$this->lineHash( $line )] );
350 if ( $this->xchanged[$xi] ) {
351 continue;
352 }
353 $this->xv[] = $line;
354 $this->xind[] = $xi;
355 }
356
357 // Find the LCS.
358 $this->compareSeq( 0, count( $this->xv ), 0, count( $this->yv ) );
359 }
360 }
361
362 /**
363 * Returns the whole line if it's small enough, or the MD5 hash otherwise
364 *
365 * @param string $line
366 *
367 * @return string
368 */
369 private function lineHash( $line ) {
370 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
371 return md5( $line );
372 } else {
373 return $line;
374 }
375 }
376
377 /**
378 * Divide the Largest Common Subsequence (LCS) of the sequences
379 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
380 * sized segments.
381 *
382 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
383 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
384 * sub sequences. The first sub-sequence is contained in [X0, X1),
385 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
386 * that (X0, Y0) == (XOFF, YOFF) and
387 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
388 *
389 * This function assumes that the first lines of the specified portions
390 * of the two files do not match, and likewise that the last lines do not
391 * match. The caller must trim matching lines from the beginning and end
392 * of the portions it is going to specify.
393 *
394 * @param int $xoff
395 * @param int $xlim
396 * @param int $yoff
397 * @param int $ylim
398 * @param int $nchunks
399 *
400 * @return array List of two elements, integer and array[].
401 */
402 private function diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
403 $flip = false;
404
405 if ( $xlim - $xoff > $ylim - $yoff ) {
406 // Things seems faster (I'm not sure I understand why)
407 // when the shortest sequence in X.
408 $flip = true;
409 list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
410 }
411
412 if ( $flip ) {
413 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
414 $ymatches[$this->xv[$i]][] = $i;
415 }
416 } else {
417 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
418 $ymatches[$this->yv[$i]][] = $i;
419 }
420 }
421
422 $this->lcs = 0;
423 $this->seq[0] = $yoff - 1;
424 $this->in_seq = array();
425 $ymids[0] = array();
426
427 $numer = $xlim - $xoff + $nchunks - 1;
428 $x = $xoff;
429 for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
430 if ( $chunk > 0 ) {
431 for ( $i = 0; $i <= $this->lcs; $i++ ) {
432 $ymids[$i][$chunk - 1] = $this->seq[$i];
433 }
434 }
435
436 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $chunk ) / $nchunks );
437 // @codingStandardsIgnoreStart Ignore Squiz.WhiteSpace.SemicolonSpacing.Incorrect
438 for ( ; $x < $x1; $x++ ) {
439 // @codingStandardsIgnoreEnd
440 $line = $flip ? $this->yv[$x] : $this->xv[$x];
441 if ( empty( $ymatches[$line] ) ) {
442 continue;
443 }
444
445 $k = 0;
446 $matches = $ymatches[$line];
447 reset( $matches );
448 while ( list( , $y ) = each( $matches ) ) {
449 if ( empty( $this->in_seq[$y] ) ) {
450 $k = $this->lcsPos( $y );
451 assert( '$k > 0' );
452 $ymids[$k] = $ymids[$k - 1];
453 break;
454 }
455 }
456
457 while ( list( , $y ) = each( $matches ) ) {
458 if ( $y > $this->seq[$k - 1] ) {
459 assert( '$y < $this->seq[$k]' );
460 // Optimization: this is a common case:
461 // next match is just replacing previous match.
462 $this->in_seq[$this->seq[$k]] = false;
463 $this->seq[$k] = $y;
464 $this->in_seq[$y] = 1;
465 } elseif ( empty( $this->in_seq[$y] ) ) {
466 $k = $this->lcsPos( $y );
467 assert( '$k > 0' );
468 $ymids[$k] = $ymids[$k - 1];
469 }
470 }
471 }
472 }
473
474 $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
475 $ymid = $ymids[$this->lcs];
476 for ( $n = 0; $n < $nchunks - 1; $n++ ) {
477 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
478 $y1 = $ymid[$n] + 1;
479 $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
480 }
481 $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
482
483 return array( $this->lcs, $seps );
484 }
485
486 /**
487 * @param int $ypos
488 *
489 * @return int
490 */
491 private function lcsPos( $ypos ) {
492 $end = $this->lcs;
493 if ( $end == 0 || $ypos > $this->seq[$end] ) {
494 $this->seq[++$this->lcs] = $ypos;
495 $this->in_seq[$ypos] = 1;
496
497 return $this->lcs;
498 }
499
500 $beg = 1;
501 while ( $beg < $end ) {
502 $mid = (int)( ( $beg + $end ) / 2 );
503 if ( $ypos > $this->seq[$mid] ) {
504 $beg = $mid + 1;
505 } else {
506 $end = $mid;
507 }
508 }
509
510 assert( '$ypos != $this->seq[$end]' );
511
512 $this->in_seq[$this->seq[$end]] = false;
513 $this->seq[$end] = $ypos;
514 $this->in_seq[$ypos] = 1;
515
516 return $end;
517 }
518
519 /**
520 * Find LCS of two sequences.
521 *
522 * The results are recorded in the vectors $this->{x,y}changed[], by
523 * storing a 1 in the element for each line that is an insertion
524 * or deletion (ie. is not in the LCS).
525 *
526 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
527 *
528 * Note that XLIM, YLIM are exclusive bounds.
529 * All line numbers are origin-0 and discarded lines are not counted.
530 *
531 * @param int $xoff
532 * @param int $xlim
533 * @param int $yoff
534 * @param int $ylim
535 */
536 private function compareSeq( $xoff, $xlim, $yoff, $ylim ) {
537 // Slide down the bottom initial diagonal.
538 while ( $xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff] ) {
539 ++$xoff;
540 ++$yoff;
541 }
542
543 // Slide up the top initial diagonal.
544 while ( $xlim > $xoff && $ylim > $yoff
545 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]
546 ) {
547 --$xlim;
548 --$ylim;
549 }
550
551 if ( $xoff == $xlim || $yoff == $ylim ) {
552 $lcs = 0;
553 } else {
554 // This is ad hoc but seems to work well.
555 // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
556 // $nchunks = max(2,min(8,(int)$nchunks));
557 $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
558 list( $lcs, $seps ) = $this->diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
559 }
560
561 if ( $lcs == 0 ) {
562 // X and Y sequences have no common subsequence:
563 // mark all changed.
564 while ( $yoff < $ylim ) {
565 $this->ychanged[$this->yind[$yoff++]] = 1;
566 }
567 while ( $xoff < $xlim ) {
568 $this->xchanged[$this->xind[$xoff++]] = 1;
569 }
570 } else {
571 // Use the partitions to split this problem into subproblems.
572 reset( $seps );
573 $pt1 = $seps[0];
574 while ( $pt2 = next( $seps ) ) {
575 $this->compareSeq( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
576 $pt1 = $pt2;
577 }
578 }
579 }
580
581 /**
582 * Adjust inserts/deletes of identical lines to join changes
583 * as much as possible.
584 *
585 * We do something when a run of changed lines include a
586 * line at one end and has an excluded, identical line at the other.
587 * We are free to choose which identical line is included.
588 * `compareseq' usually chooses the one at the beginning,
589 * but usually it is cleaner to consider the following identical line
590 * to be the "change".
591 *
592 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
593 */
594 private function shiftBoundaries( $lines, &$changed, $other_changed ) {
595 $i = 0;
596 $j = 0;
597
598 assert( 'count($lines) == count($changed)' );
599 $len = count( $lines );
600 $other_len = count( $other_changed );
601
602 while ( 1 ) {
603 /*
604 * Scan forwards to find beginning of another run of changes.
605 * Also keep track of the corresponding point in the other file.
606 *
607 * Throughout this code, $i and $j are adjusted together so that
608 * the first $i elements of $changed and the first $j elements
609 * of $other_changed both contain the same number of zeros
610 * (unchanged lines).
611 * Furthermore, $j is always kept so that $j == $other_len or
612 * $other_changed[$j] == false.
613 */
614 while ( $j < $other_len && $other_changed[$j] ) {
615 $j++;
616 }
617
618 while ( $i < $len && !$changed[$i] ) {
619 assert( '$j < $other_len && ! $other_changed[$j]' );
620 $i++;
621 $j++;
622 while ( $j < $other_len && $other_changed[$j] ) {
623 $j++;
624 }
625 }
626
627 if ( $i == $len ) {
628 break;
629 }
630
631 $start = $i;
632
633 // Find the end of this run of changes.
634 while ( ++$i < $len && $changed[$i] ) {
635 continue;
636 }
637
638 do {
639 /*
640 * Record the length of this run of changes, so that
641 * we can later determine whether the run has grown.
642 */
643 $runlength = $i - $start;
644
645 /*
646 * Move the changed region back, so long as the
647 * previous unchanged line matches the last changed one.
648 * This merges with previous changed regions.
649 */
650 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
651 $changed[--$start] = 1;
652 $changed[--$i] = false;
653 while ( $start > 0 && $changed[$start - 1] ) {
654 $start--;
655 }
656 assert( '$j > 0' );
657 while ( $other_changed[--$j] ) {
658 continue;
659 }
660 assert( '$j >= 0 && !$other_changed[$j]' );
661 }
662
663 /*
664 * Set CORRESPONDING to the end of the changed run, at the last
665 * point where it corresponds to a changed run in the other file.
666 * CORRESPONDING == LEN means no such point has been found.
667 */
668 $corresponding = $j < $other_len ? $i : $len;
669
670 /*
671 * Move the changed region forward, so long as the
672 * first changed line matches the following unchanged one.
673 * This merges with following changed regions.
674 * Do this second, so that if there are no merges,
675 * the changed region is moved forward as far as possible.
676 */
677 while ( $i < $len && $lines[$start] == $lines[$i] ) {
678 $changed[$start++] = false;
679 $changed[$i++] = 1;
680 while ( $i < $len && $changed[$i] ) {
681 $i++;
682 }
683
684 assert( '$j < $other_len && ! $other_changed[$j]' );
685 $j++;
686 if ( $j < $other_len && $other_changed[$j] ) {
687 $corresponding = $i;
688 while ( $j < $other_len && $other_changed[$j] ) {
689 $j++;
690 }
691 }
692 }
693 } while ( $runlength != $i - $start );
694
695 /*
696 * If possible, move the fully-merged run of changes
697 * back to a corresponding run in the other file.
698 */
699 while ( $corresponding < $i ) {
700 $changed[--$start] = 1;
701 $changed[--$i] = 0;
702 assert( '$j > 0' );
703 while ( $other_changed[--$j] ) {
704 continue;
705 }
706 assert( '$j >= 0 && !$other_changed[$j]' );
707 }
708 }
709 }
710 }
711
712 /**
713 * Class representing a 'diff' between two sequences of strings.
714 * @todo document
715 * @private
716 * @ingroup DifferenceEngine
717 */
718 class Diff {
719
720 /**
721 * @var DiffOp[]
722 */
723 public $edits;
724
725 /**
726 * Constructor.
727 * Computes diff between sequences of strings.
728 *
729 * @param string[] $from_lines An array of strings.
730 * Typically these are lines from a file.
731 * @param string[] $to_lines An array of strings.
732 */
733 public function __construct( $from_lines, $to_lines ) {
734 $eng = new DiffEngine;
735 $this->edits = $eng->diff( $from_lines, $to_lines );
736 }
737
738 /**
739 * @return DiffOp[]
740 */
741 public function getEdits() {
742 return $this->edits;
743 }
744
745 /**
746 * Compute reversed Diff.
747 *
748 * SYNOPSIS:
749 *
750 * $diff = new Diff($lines1, $lines2);
751 * $rev = $diff->reverse();
752 *
753 * @return Object A Diff object representing the inverse of the
754 * original diff.
755 */
756 public function reverse() {
757 $rev = $this;
758 $rev->edits = array();
759 /** @var DiffOp $edit */
760 foreach ( $this->edits as $edit ) {
761 $rev->edits[] = $edit->reverse();
762 }
763
764 return $rev;
765 }
766
767 /**
768 * Check for empty diff.
769 *
770 * @return bool True if two sequences were identical.
771 */
772 public function isEmpty() {
773 foreach ( $this->edits as $edit ) {
774 if ( $edit->type != 'copy' ) {
775 return false;
776 }
777 }
778
779 return true;
780 }
781
782 /**
783 * Compute the length of the Longest Common Subsequence (LCS).
784 *
785 * This is mostly for diagnostic purposed.
786 *
787 * @return int The length of the LCS.
788 */
789 public function lcs() {
790 $lcs = 0;
791 foreach ( $this->edits as $edit ) {
792 if ( $edit->type == 'copy' ) {
793 $lcs += count( $edit->orig );
794 }
795 }
796
797 return $lcs;
798 }
799
800 /**
801 * Get the original set of lines.
802 *
803 * This reconstructs the $from_lines parameter passed to the
804 * constructor.
805 *
806 * @return string[] The original sequence of strings.
807 */
808 public function orig() {
809 $lines = array();
810
811 foreach ( $this->edits as $edit ) {
812 if ( $edit->orig ) {
813 array_splice( $lines, count( $lines ), 0, $edit->orig );
814 }
815 }
816
817 return $lines;
818 }
819
820 /**
821 * Get the closing set of lines.
822 *
823 * This reconstructs the $to_lines parameter passed to the
824 * constructor.
825 *
826 * @return string[] The sequence of strings.
827 */
828 public function closing() {
829 $lines = array();
830
831 foreach ( $this->edits as $edit ) {
832 if ( $edit->closing ) {
833 array_splice( $lines, count( $lines ), 0, $edit->closing );
834 }
835 }
836
837 return $lines;
838 }
839 }
840
841 /**
842 * @todo document, bad name.
843 * @private
844 * @ingroup DifferenceEngine
845 */
846 class MappedDiff extends Diff {
847 /**
848 * Constructor.
849 *
850 * Computes diff between sequences of strings.
851 *
852 * This can be used to compute things like
853 * case-insensitve diffs, or diffs which ignore
854 * changes in white-space.
855 *
856 * @param string[] $from_lines An array of strings.
857 * Typically these are lines from a file.
858 * @param string[] $to_lines An array of strings.
859 * @param string[] $mapped_from_lines This array should
860 * have the same size number of elements as $from_lines.
861 * The elements in $mapped_from_lines and
862 * $mapped_to_lines are what is actually compared
863 * when computing the diff.
864 * @param string[] $mapped_to_lines This array should
865 * have the same number of elements as $to_lines.
866 */
867 public function __construct( $from_lines, $to_lines,
868 $mapped_from_lines, $mapped_to_lines ) {
869
870 assert( 'count( $from_lines ) == count( $mapped_from_lines )' );
871 assert( 'count( $to_lines ) == count( $mapped_to_lines )' );
872
873 parent::__construct( $mapped_from_lines, $mapped_to_lines );
874
875 $xi = $yi = 0;
876 $editCount = count( $this->edits );
877 for ( $i = 0; $i < $editCount; $i++ ) {
878 $orig = &$this->edits[$i]->orig;
879 if ( is_array( $orig ) ) {
880 $orig = array_slice( $from_lines, $xi, count( $orig ) );
881 $xi += count( $orig );
882 }
883
884 $closing = &$this->edits[$i]->closing;
885 if ( is_array( $closing ) ) {
886 $closing = array_slice( $to_lines, $yi, count( $closing ) );
887 $yi += count( $closing );
888 }
889 }
890 }
891 }
892
893 /**
894 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
895 */
896
897 /**
898 * @todo document
899 * @private
900 * @ingroup DifferenceEngine
901 */
902 class HWLDFWordAccumulator {
903 public $insClass = ' class="diffchange diffchange-inline"';
904 public $delClass = ' class="diffchange diffchange-inline"';
905
906 private $lines = array();
907 private $line = '';
908 private $group = '';
909 private $tag = '';
910
911 /**
912 * @param string $new_tag
913 */
914 private function flushGroup( $new_tag ) {
915 if ( $this->group !== '' ) {
916 if ( $this->tag == 'ins' ) {
917 $this->line .= "<ins{$this->insClass}>" .
918 htmlspecialchars( $this->group ) . '</ins>';
919 } elseif ( $this->tag == 'del' ) {
920 $this->line .= "<del{$this->delClass}>" .
921 htmlspecialchars( $this->group ) . '</del>';
922 } else {
923 $this->line .= htmlspecialchars( $this->group );
924 }
925 }
926 $this->group = '';
927 $this->tag = $new_tag;
928 }
929
930 /**
931 * @param string $new_tag
932 */
933 private function flushLine( $new_tag ) {
934 $this->flushGroup( $new_tag );
935 if ( $this->line != '' ) {
936 array_push( $this->lines, $this->line );
937 } else {
938 # make empty lines visible by inserting an NBSP
939 array_push( $this->lines, '&#160;' );
940 }
941 $this->line = '';
942 }
943
944 /**
945 * @param string[] $words
946 * @param string $tag
947 */
948 public function addWords( $words, $tag = '' ) {
949 if ( $tag != $this->tag ) {
950 $this->flushGroup( $tag );
951 }
952
953 foreach ( $words as $word ) {
954 // new-line should only come as first char of word.
955 if ( $word == '' ) {
956 continue;
957 }
958 if ( $word[0] == "\n" ) {
959 $this->flushLine( $tag );
960 $word = substr( $word, 1 );
961 }
962 assert( '!strstr( $word, "\n" )' );
963 $this->group .= $word;
964 }
965 }
966
967 /**
968 * @return string[]
969 */
970 public function getLines() {
971 $this->flushLine( '~done' );
972
973 return $this->lines;
974 }
975 }
976
977 /**
978 * @todo document
979 * @private
980 * @ingroup DifferenceEngine
981 */
982 class WordLevelDiff extends MappedDiff {
983 const MAX_LINE_LENGTH = 10000;
984
985 /**
986 * @param string[] $orig_lines
987 * @param string[] $closing_lines
988 */
989 public function __construct( $orig_lines, $closing_lines ) {
990
991 list( $orig_words, $orig_stripped ) = $this->split( $orig_lines );
992 list( $closing_words, $closing_stripped ) = $this->split( $closing_lines );
993
994 parent::__construct( $orig_words, $closing_words,
995 $orig_stripped, $closing_stripped );
996 }
997
998 /**
999 * @param string[] $lines
1000 *
1001 * @return array[]
1002 */
1003 private function split( $lines ) {
1004
1005 $words = array();
1006 $stripped = array();
1007 $first = true;
1008 foreach ( $lines as $line ) {
1009 # If the line is too long, just pretend the entire line is one big word
1010 # This prevents resource exhaustion problems
1011 if ( $first ) {
1012 $first = false;
1013 } else {
1014 $words[] = "\n";
1015 $stripped[] = "\n";
1016 }
1017 if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
1018 $words[] = $line;
1019 $stripped[] = $line;
1020 } else {
1021 $m = array();
1022 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1023 $line, $m )
1024 ) {
1025 foreach ( $m[0] as $word ) {
1026 $words[] = $word;
1027 }
1028 foreach ( $m[1] as $stripped_word ) {
1029 $stripped[] = $stripped_word;
1030 }
1031 }
1032 }
1033 }
1034
1035 return array( $words, $stripped );
1036 }
1037
1038 /**
1039 * @return string[]
1040 */
1041 public function orig() {
1042 $orig = new HWLDFWordAccumulator;
1043
1044 foreach ( $this->edits as $edit ) {
1045 if ( $edit->type == 'copy' ) {
1046 $orig->addWords( $edit->orig );
1047 } elseif ( $edit->orig ) {
1048 $orig->addWords( $edit->orig, 'del' );
1049 }
1050 }
1051 $lines = $orig->getLines();
1052
1053 return $lines;
1054 }
1055
1056 /**
1057 * @return string[]
1058 */
1059 public function closing() {
1060 $closing = new HWLDFWordAccumulator;
1061
1062 foreach ( $this->edits as $edit ) {
1063 if ( $edit->type == 'copy' ) {
1064 $closing->addWords( $edit->closing );
1065 } elseif ( $edit->closing ) {
1066 $closing->addWords( $edit->closing, 'ins' );
1067 }
1068 }
1069 $lines = $closing->getLines();
1070
1071 return $lines;
1072 }
1073
1074 }