Fix "Undefined property: DiffEngine::$seq" under HHVM in DairikiDiff.php
[lhc/web/wiklou.git] / includes / diff / DairikiDiff.php
1 <?php
2 /**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6 * You may copy this code freely under the conditions of the GPL.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup DifferenceEngine
25 * @defgroup DifferenceEngine DifferenceEngine
26 */
27
28 /**
29 * @todo document
30 * @private
31 * @ingroup DifferenceEngine
32 */
33 abstract class DiffOp {
34
35 /**
36 * @var string
37 */
38 public $type;
39
40 /**
41 * @var string[]
42 */
43 public $orig;
44
45 /**
46 * @var string[]
47 */
48 public $closing;
49
50 /**
51 * @return string
52 */
53 public function getType() {
54 return $this->type;
55 }
56
57 /**
58 * @return string[]
59 */
60 public function getOrig() {
61 return $this->orig;
62 }
63
64 /**
65 * @param int $i
66 * @return string|null
67 */
68 public function getClosing( $i = null ) {
69 if ( $i === null ) {
70 return $this->closing;
71 }
72 if ( array_key_exists( $i, $this->closing ) ) {
73 return $this->closing[$i];
74 }
75 return null;
76 }
77
78 abstract public function reverse();
79
80 /**
81 * @return int
82 */
83 public function norig() {
84 return $this->orig ? count( $this->orig ) : 0;
85 }
86
87 /**
88 * @return int
89 */
90 public function nclosing() {
91 return $this->closing ? count( $this->closing ) : 0;
92 }
93 }
94
95 /**
96 * @todo document
97 * @private
98 * @ingroup DifferenceEngine
99 */
100 class DiffOpCopy extends DiffOp {
101 public $type = 'copy';
102
103 public function __construct( $orig, $closing = false ) {
104 if ( !is_array( $closing ) ) {
105 $closing = $orig;
106 }
107 $this->orig = $orig;
108 $this->closing = $closing;
109 }
110
111 /**
112 * @return DiffOpCopy
113 */
114 public function reverse() {
115 return new DiffOpCopy( $this->closing, $this->orig );
116 }
117 }
118
119 /**
120 * @todo document
121 * @private
122 * @ingroup DifferenceEngine
123 */
124 class DiffOpDelete extends DiffOp {
125 public $type = 'delete';
126
127 public function __construct( $lines ) {
128 $this->orig = $lines;
129 $this->closing = false;
130 }
131
132 /**
133 * @return DiffOpAdd
134 */
135 public function reverse() {
136 return new DiffOpAdd( $this->orig );
137 }
138 }
139
140 /**
141 * @todo document
142 * @private
143 * @ingroup DifferenceEngine
144 */
145 class DiffOpAdd extends DiffOp {
146 public $type = 'add';
147
148 public function __construct( $lines ) {
149 $this->closing = $lines;
150 $this->orig = false;
151 }
152
153 /**
154 * @return DiffOpDelete
155 */
156 public function reverse() {
157 return new DiffOpDelete( $this->closing );
158 }
159 }
160
161 /**
162 * @todo document
163 * @private
164 * @ingroup DifferenceEngine
165 */
166 class DiffOpChange extends DiffOp {
167 public $type = 'change';
168
169 public function __construct( $orig, $closing ) {
170 $this->orig = $orig;
171 $this->closing = $closing;
172 }
173
174 /**
175 * @return DiffOpChange
176 */
177 public function reverse() {
178 return new DiffOpChange( $this->closing, $this->orig );
179 }
180 }
181
182 /**
183 * Class used internally by Diff to actually compute the diffs.
184 *
185 * The algorithm used here is mostly lifted from the perl module
186 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
187 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
188 *
189 * More ideas are taken from:
190 * http://www.ics.uci.edu/~eppstein/161/960229.html
191 *
192 * Some ideas (and a bit of code) are from analyze.c, from GNU
193 * diffutils-2.7, which can be found at:
194 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
195 *
196 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
197 * are my own.
198 *
199 * Line length limits for robustness added by Tim Starling, 2005-08-31
200 * Alternative implementation added by Guy Van den Broeck, 2008-07-30
201 *
202 * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
203 * @private
204 * @ingroup DifferenceEngine
205 */
206 class DiffEngine {
207 const MAX_XREF_LENGTH = 10000;
208
209 protected $xchanged, $ychanged;
210
211 protected $xv = array(), $yv = array();
212 protected $xind = array(), $yind = array();
213
214 protected $seq = array(), $in_seq = array();
215
216 protected $lcs = 0;
217
218 /**
219 * @param string[] $from_lines
220 * @param string[] $to_lines
221 *
222 * @return DiffOp[]
223 */
224 public function diff( $from_lines, $to_lines ) {
225
226 // Diff and store locally
227 $this->diffLocal( $from_lines, $to_lines );
228
229 // Merge edits when possible
230 $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged );
231 $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged );
232
233 // Compute the edit operations.
234 $n_from = count( $from_lines );
235 $n_to = count( $to_lines );
236
237 $edits = array();
238 $xi = $yi = 0;
239 while ( $xi < $n_from || $yi < $n_to ) {
240 assert( '$yi < $n_to || $this->xchanged[$xi]' );
241 assert( '$xi < $n_from || $this->ychanged[$yi]' );
242
243 // Skip matching "snake".
244 $copy = array();
245 while ( $xi < $n_from && $yi < $n_to
246 && !$this->xchanged[$xi] && !$this->ychanged[$yi]
247 ) {
248 $copy[] = $from_lines[$xi++];
249 ++$yi;
250 }
251 if ( $copy ) {
252 $edits[] = new DiffOpCopy( $copy );
253 }
254
255 // Find deletes & adds.
256 $delete = array();
257 while ( $xi < $n_from && $this->xchanged[$xi] ) {
258 $delete[] = $from_lines[$xi++];
259 }
260
261 $add = array();
262 while ( $yi < $n_to && $this->ychanged[$yi] ) {
263 $add[] = $to_lines[$yi++];
264 }
265
266 if ( $delete && $add ) {
267 $edits[] = new DiffOpChange( $delete, $add );
268 } elseif ( $delete ) {
269 $edits[] = new DiffOpDelete( $delete );
270 } elseif ( $add ) {
271 $edits[] = new DiffOpAdd( $add );
272 }
273 }
274
275 return $edits;
276 }
277
278 /**
279 * @param string[] $from_lines
280 * @param string[] $to_lines
281 */
282 private function diffLocal( $from_lines, $to_lines ) {
283 global $wgExternalDiffEngine;
284
285 if ( $wgExternalDiffEngine == 'wikidiff3' ) {
286 // wikidiff3
287 $wikidiff3 = new WikiDiff3();
288 $wikidiff3->diff( $from_lines, $to_lines );
289 $this->xchanged = $wikidiff3->removed;
290 $this->ychanged = $wikidiff3->added;
291 unset( $wikidiff3 );
292 } else {
293 // old diff
294 $n_from = count( $from_lines );
295 $n_to = count( $to_lines );
296 $this->xchanged = $this->ychanged = array();
297 $this->xv = $this->yv = array();
298 $this->xind = $this->yind = array();
299 $this->seq = array();
300 $this->in_seq = array();
301 $this->lcs = 0;
302
303 // Skip leading common lines.
304 for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
305 if ( $from_lines[$skip] !== $to_lines[$skip] ) {
306 break;
307 }
308 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
309 }
310 // Skip trailing common lines.
311 $xi = $n_from;
312 $yi = $n_to;
313 for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
314 if ( $from_lines[$xi] !== $to_lines[$yi] ) {
315 break;
316 }
317 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
318 }
319
320 // Ignore lines which do not exist in both files.
321 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
322 $xhash[$this->lineHash( $from_lines[$xi] )] = 1;
323 }
324
325 for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
326 $line = $to_lines[$yi];
327 $this->ychanged[$yi] = empty( $xhash[$this->lineHash( $line )] );
328 if ( $this->ychanged[$yi] ) {
329 continue;
330 }
331 $yhash[$this->lineHash( $line )] = 1;
332 $this->yv[] = $line;
333 $this->yind[] = $yi;
334 }
335 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
336 $line = $from_lines[$xi];
337 $this->xchanged[$xi] = empty( $yhash[$this->lineHash( $line )] );
338 if ( $this->xchanged[$xi] ) {
339 continue;
340 }
341 $this->xv[] = $line;
342 $this->xind[] = $xi;
343 }
344
345 // Find the LCS.
346 $this->compareSeq( 0, count( $this->xv ), 0, count( $this->yv ) );
347 }
348 }
349
350 /**
351 * Returns the whole line if it's small enough, or the MD5 hash otherwise
352 *
353 * @param string $line
354 *
355 * @return string
356 */
357 private function lineHash( $line ) {
358 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
359 return md5( $line );
360 } else {
361 return $line;
362 }
363 }
364
365 /**
366 * Divide the Largest Common Subsequence (LCS) of the sequences
367 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
368 * sized segments.
369 *
370 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
371 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
372 * sub sequences. The first sub-sequence is contained in [X0, X1),
373 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
374 * that (X0, Y0) == (XOFF, YOFF) and
375 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
376 *
377 * This function assumes that the first lines of the specified portions
378 * of the two files do not match, and likewise that the last lines do not
379 * match. The caller must trim matching lines from the beginning and end
380 * of the portions it is going to specify.
381 *
382 * @param int $xoff
383 * @param int $xlim
384 * @param int $yoff
385 * @param int $ylim
386 * @param int $nchunks
387 *
388 * @return array List of two elements, integer and array[].
389 */
390 private function diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
391 $flip = false;
392
393 if ( $xlim - $xoff > $ylim - $yoff ) {
394 // Things seems faster (I'm not sure I understand why)
395 // when the shortest sequence in X.
396 $flip = true;
397 list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
398 }
399
400 if ( $flip ) {
401 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
402 $ymatches[$this->xv[$i]][] = $i;
403 }
404 } else {
405 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
406 $ymatches[$this->yv[$i]][] = $i;
407 }
408 }
409
410 $this->lcs = 0;
411 $this->seq[0] = $yoff - 1;
412 $this->in_seq = array();
413 $ymids[0] = array();
414
415 $numer = $xlim - $xoff + $nchunks - 1;
416 $x = $xoff;
417 for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
418 if ( $chunk > 0 ) {
419 for ( $i = 0; $i <= $this->lcs; $i++ ) {
420 $ymids[$i][$chunk - 1] = $this->seq[$i];
421 }
422 }
423
424 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $chunk ) / $nchunks );
425 // @codingStandardsIgnoreStart Ignore Squiz.WhiteSpace.SemicolonSpacing.Incorrect
426 for ( ; $x < $x1; $x++ ) {
427 // @codingStandardsIgnoreEnd
428 $line = $flip ? $this->yv[$x] : $this->xv[$x];
429 if ( empty( $ymatches[$line] ) ) {
430 continue;
431 }
432
433 $k = 0;
434 $matches = $ymatches[$line];
435 reset( $matches );
436 while ( list( , $y ) = each( $matches ) ) {
437 if ( empty( $this->in_seq[$y] ) ) {
438 $k = $this->lcsPos( $y );
439 assert( '$k > 0' );
440 $ymids[$k] = $ymids[$k - 1];
441 break;
442 }
443 }
444
445 while ( list( , $y ) = each( $matches ) ) {
446 if ( $y > $this->seq[$k - 1] ) {
447 assert( '$y < $this->seq[$k]' );
448 // Optimization: this is a common case:
449 // next match is just replacing previous match.
450 $this->in_seq[$this->seq[$k]] = false;
451 $this->seq[$k] = $y;
452 $this->in_seq[$y] = 1;
453 } elseif ( empty( $this->in_seq[$y] ) ) {
454 $k = $this->lcsPos( $y );
455 assert( '$k > 0' );
456 $ymids[$k] = $ymids[$k - 1];
457 }
458 }
459 }
460 }
461
462 $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
463 $ymid = $ymids[$this->lcs];
464 for ( $n = 0; $n < $nchunks - 1; $n++ ) {
465 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
466 $y1 = $ymid[$n] + 1;
467 $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
468 }
469 $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
470
471 return array( $this->lcs, $seps );
472 }
473
474 /**
475 * @param int $ypos
476 *
477 * @return int
478 */
479 private function lcsPos( $ypos ) {
480 $end = $this->lcs;
481 if ( $end == 0 || $ypos > $this->seq[$end] ) {
482 $this->seq[++$this->lcs] = $ypos;
483 $this->in_seq[$ypos] = 1;
484
485 return $this->lcs;
486 }
487
488 $beg = 1;
489 while ( $beg < $end ) {
490 $mid = (int)( ( $beg + $end ) / 2 );
491 if ( $ypos > $this->seq[$mid] ) {
492 $beg = $mid + 1;
493 } else {
494 $end = $mid;
495 }
496 }
497
498 assert( '$ypos != $this->seq[$end]' );
499
500 $this->in_seq[$this->seq[$end]] = false;
501 $this->seq[$end] = $ypos;
502 $this->in_seq[$ypos] = 1;
503
504 return $end;
505 }
506
507 /**
508 * Find LCS of two sequences.
509 *
510 * The results are recorded in the vectors $this->{x,y}changed[], by
511 * storing a 1 in the element for each line that is an insertion
512 * or deletion (ie. is not in the LCS).
513 *
514 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
515 *
516 * Note that XLIM, YLIM are exclusive bounds.
517 * All line numbers are origin-0 and discarded lines are not counted.
518 *
519 * @param int $xoff
520 * @param int $xlim
521 * @param int $yoff
522 * @param int $ylim
523 */
524 private function compareSeq( $xoff, $xlim, $yoff, $ylim ) {
525 // Slide down the bottom initial diagonal.
526 while ( $xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff] ) {
527 ++$xoff;
528 ++$yoff;
529 }
530
531 // Slide up the top initial diagonal.
532 while ( $xlim > $xoff && $ylim > $yoff
533 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]
534 ) {
535 --$xlim;
536 --$ylim;
537 }
538
539 if ( $xoff == $xlim || $yoff == $ylim ) {
540 $lcs = 0;
541 } else {
542 // This is ad hoc but seems to work well.
543 // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
544 // $nchunks = max(2,min(8,(int)$nchunks));
545 $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
546 list( $lcs, $seps ) = $this->diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
547 }
548
549 if ( $lcs == 0 ) {
550 // X and Y sequences have no common subsequence:
551 // mark all changed.
552 while ( $yoff < $ylim ) {
553 $this->ychanged[$this->yind[$yoff++]] = 1;
554 }
555 while ( $xoff < $xlim ) {
556 $this->xchanged[$this->xind[$xoff++]] = 1;
557 }
558 } else {
559 // Use the partitions to split this problem into subproblems.
560 reset( $seps );
561 $pt1 = $seps[0];
562 while ( $pt2 = next( $seps ) ) {
563 $this->compareSeq( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
564 $pt1 = $pt2;
565 }
566 }
567 }
568
569 /**
570 * Adjust inserts/deletes of identical lines to join changes
571 * as much as possible.
572 *
573 * We do something when a run of changed lines include a
574 * line at one end and has an excluded, identical line at the other.
575 * We are free to choose which identical line is included.
576 * `compareseq' usually chooses the one at the beginning,
577 * but usually it is cleaner to consider the following identical line
578 * to be the "change".
579 *
580 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
581 */
582 private function shiftBoundaries( $lines, &$changed, $other_changed ) {
583 $i = 0;
584 $j = 0;
585
586 assert( 'count($lines) == count($changed)' );
587 $len = count( $lines );
588 $other_len = count( $other_changed );
589
590 while ( 1 ) {
591 /*
592 * Scan forwards to find beginning of another run of changes.
593 * Also keep track of the corresponding point in the other file.
594 *
595 * Throughout this code, $i and $j are adjusted together so that
596 * the first $i elements of $changed and the first $j elements
597 * of $other_changed both contain the same number of zeros
598 * (unchanged lines).
599 * Furthermore, $j is always kept so that $j == $other_len or
600 * $other_changed[$j] == false.
601 */
602 while ( $j < $other_len && $other_changed[$j] ) {
603 $j++;
604 }
605
606 while ( $i < $len && !$changed[$i] ) {
607 assert( '$j < $other_len && ! $other_changed[$j]' );
608 $i++;
609 $j++;
610 while ( $j < $other_len && $other_changed[$j] ) {
611 $j++;
612 }
613 }
614
615 if ( $i == $len ) {
616 break;
617 }
618
619 $start = $i;
620
621 // Find the end of this run of changes.
622 while ( ++$i < $len && $changed[$i] ) {
623 continue;
624 }
625
626 do {
627 /*
628 * Record the length of this run of changes, so that
629 * we can later determine whether the run has grown.
630 */
631 $runlength = $i - $start;
632
633 /*
634 * Move the changed region back, so long as the
635 * previous unchanged line matches the last changed one.
636 * This merges with previous changed regions.
637 */
638 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
639 $changed[--$start] = 1;
640 $changed[--$i] = false;
641 while ( $start > 0 && $changed[$start - 1] ) {
642 $start--;
643 }
644 assert( '$j > 0' );
645 while ( $other_changed[--$j] ) {
646 continue;
647 }
648 assert( '$j >= 0 && !$other_changed[$j]' );
649 }
650
651 /*
652 * Set CORRESPONDING to the end of the changed run, at the last
653 * point where it corresponds to a changed run in the other file.
654 * CORRESPONDING == LEN means no such point has been found.
655 */
656 $corresponding = $j < $other_len ? $i : $len;
657
658 /*
659 * Move the changed region forward, so long as the
660 * first changed line matches the following unchanged one.
661 * This merges with following changed regions.
662 * Do this second, so that if there are no merges,
663 * the changed region is moved forward as far as possible.
664 */
665 while ( $i < $len && $lines[$start] == $lines[$i] ) {
666 $changed[$start++] = false;
667 $changed[$i++] = 1;
668 while ( $i < $len && $changed[$i] ) {
669 $i++;
670 }
671
672 assert( '$j < $other_len && ! $other_changed[$j]' );
673 $j++;
674 if ( $j < $other_len && $other_changed[$j] ) {
675 $corresponding = $i;
676 while ( $j < $other_len && $other_changed[$j] ) {
677 $j++;
678 }
679 }
680 }
681 } while ( $runlength != $i - $start );
682
683 /*
684 * If possible, move the fully-merged run of changes
685 * back to a corresponding run in the other file.
686 */
687 while ( $corresponding < $i ) {
688 $changed[--$start] = 1;
689 $changed[--$i] = 0;
690 assert( '$j > 0' );
691 while ( $other_changed[--$j] ) {
692 continue;
693 }
694 assert( '$j >= 0 && !$other_changed[$j]' );
695 }
696 }
697 }
698 }
699
700 /**
701 * Class representing a 'diff' between two sequences of strings.
702 * @todo document
703 * @private
704 * @ingroup DifferenceEngine
705 */
706 class Diff {
707
708 /**
709 * @var DiffOp[]
710 */
711 public $edits;
712
713 /**
714 * Constructor.
715 * Computes diff between sequences of strings.
716 *
717 * @param string[] $from_lines An array of strings.
718 * Typically these are lines from a file.
719 * @param string[] $to_lines An array of strings.
720 */
721 public function __construct( $from_lines, $to_lines ) {
722 $eng = new DiffEngine;
723 $this->edits = $eng->diff( $from_lines, $to_lines );
724 }
725
726 /**
727 * @return DiffOp[]
728 */
729 public function getEdits() {
730 return $this->edits;
731 }
732
733 /**
734 * Compute reversed Diff.
735 *
736 * SYNOPSIS:
737 *
738 * $diff = new Diff($lines1, $lines2);
739 * $rev = $diff->reverse();
740 *
741 * @return Object A Diff object representing the inverse of the
742 * original diff.
743 */
744 public function reverse() {
745 $rev = $this;
746 $rev->edits = array();
747 /** @var DiffOp $edit */
748 foreach ( $this->edits as $edit ) {
749 $rev->edits[] = $edit->reverse();
750 }
751
752 return $rev;
753 }
754
755 /**
756 * Check for empty diff.
757 *
758 * @return bool True if two sequences were identical.
759 */
760 public function isEmpty() {
761 foreach ( $this->edits as $edit ) {
762 if ( $edit->type != 'copy' ) {
763 return false;
764 }
765 }
766
767 return true;
768 }
769
770 /**
771 * Compute the length of the Longest Common Subsequence (LCS).
772 *
773 * This is mostly for diagnostic purposed.
774 *
775 * @return int The length of the LCS.
776 */
777 public function lcs() {
778 $lcs = 0;
779 foreach ( $this->edits as $edit ) {
780 if ( $edit->type == 'copy' ) {
781 $lcs += count( $edit->orig );
782 }
783 }
784
785 return $lcs;
786 }
787
788 /**
789 * Get the original set of lines.
790 *
791 * This reconstructs the $from_lines parameter passed to the
792 * constructor.
793 *
794 * @return string[] The original sequence of strings.
795 */
796 public function orig() {
797 $lines = array();
798
799 foreach ( $this->edits as $edit ) {
800 if ( $edit->orig ) {
801 array_splice( $lines, count( $lines ), 0, $edit->orig );
802 }
803 }
804
805 return $lines;
806 }
807
808 /**
809 * Get the closing set of lines.
810 *
811 * This reconstructs the $to_lines parameter passed to the
812 * constructor.
813 *
814 * @return string[] The sequence of strings.
815 */
816 public function closing() {
817 $lines = array();
818
819 foreach ( $this->edits as $edit ) {
820 if ( $edit->closing ) {
821 array_splice( $lines, count( $lines ), 0, $edit->closing );
822 }
823 }
824
825 return $lines;
826 }
827 }
828
829 /**
830 * @todo document, bad name.
831 * @private
832 * @ingroup DifferenceEngine
833 */
834 class MappedDiff extends Diff {
835 /**
836 * Constructor.
837 *
838 * Computes diff between sequences of strings.
839 *
840 * This can be used to compute things like
841 * case-insensitve diffs, or diffs which ignore
842 * changes in white-space.
843 *
844 * @param string[] $from_lines An array of strings.
845 * Typically these are lines from a file.
846 * @param string[] $to_lines An array of strings.
847 * @param string[] $mapped_from_lines This array should
848 * have the same size number of elements as $from_lines.
849 * The elements in $mapped_from_lines and
850 * $mapped_to_lines are what is actually compared
851 * when computing the diff.
852 * @param string[] $mapped_to_lines This array should
853 * have the same number of elements as $to_lines.
854 */
855 public function __construct( $from_lines, $to_lines,
856 $mapped_from_lines, $mapped_to_lines ) {
857
858 assert( 'count( $from_lines ) == count( $mapped_from_lines )' );
859 assert( 'count( $to_lines ) == count( $mapped_to_lines )' );
860
861 parent::__construct( $mapped_from_lines, $mapped_to_lines );
862
863 $xi = $yi = 0;
864 $editCount = count( $this->edits );
865 for ( $i = 0; $i < $editCount; $i++ ) {
866 $orig = &$this->edits[$i]->orig;
867 if ( is_array( $orig ) ) {
868 $orig = array_slice( $from_lines, $xi, count( $orig ) );
869 $xi += count( $orig );
870 }
871
872 $closing = &$this->edits[$i]->closing;
873 if ( is_array( $closing ) ) {
874 $closing = array_slice( $to_lines, $yi, count( $closing ) );
875 $yi += count( $closing );
876 }
877 }
878 }
879 }
880
881 /**
882 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
883 */
884
885 /**
886 * @todo document
887 * @private
888 * @ingroup DifferenceEngine
889 */
890 class HWLDFWordAccumulator {
891 public $insClass = ' class="diffchange diffchange-inline"';
892 public $delClass = ' class="diffchange diffchange-inline"';
893
894 private $lines = array();
895 private $line = '';
896 private $group = '';
897 private $tag = '';
898
899 /**
900 * @param string $new_tag
901 */
902 private function flushGroup( $new_tag ) {
903 if ( $this->group !== '' ) {
904 if ( $this->tag == 'ins' ) {
905 $this->line .= "<ins{$this->insClass}>" .
906 htmlspecialchars( $this->group ) . '</ins>';
907 } elseif ( $this->tag == 'del' ) {
908 $this->line .= "<del{$this->delClass}>" .
909 htmlspecialchars( $this->group ) . '</del>';
910 } else {
911 $this->line .= htmlspecialchars( $this->group );
912 }
913 }
914 $this->group = '';
915 $this->tag = $new_tag;
916 }
917
918 /**
919 * @param string $new_tag
920 */
921 private function flushLine( $new_tag ) {
922 $this->flushGroup( $new_tag );
923 if ( $this->line != '' ) {
924 array_push( $this->lines, $this->line );
925 } else {
926 # make empty lines visible by inserting an NBSP
927 array_push( $this->lines, '&#160;' );
928 }
929 $this->line = '';
930 }
931
932 /**
933 * @param string[] $words
934 * @param string $tag
935 */
936 public function addWords( $words, $tag = '' ) {
937 if ( $tag != $this->tag ) {
938 $this->flushGroup( $tag );
939 }
940
941 foreach ( $words as $word ) {
942 // new-line should only come as first char of word.
943 if ( $word == '' ) {
944 continue;
945 }
946 if ( $word[0] == "\n" ) {
947 $this->flushLine( $tag );
948 $word = substr( $word, 1 );
949 }
950 assert( '!strstr( $word, "\n" )' );
951 $this->group .= $word;
952 }
953 }
954
955 /**
956 * @return string[]
957 */
958 public function getLines() {
959 $this->flushLine( '~done' );
960
961 return $this->lines;
962 }
963 }
964
965 /**
966 * @todo document
967 * @private
968 * @ingroup DifferenceEngine
969 */
970 class WordLevelDiff extends MappedDiff {
971 const MAX_LINE_LENGTH = 10000;
972
973 /**
974 * @param string[] $orig_lines
975 * @param string[] $closing_lines
976 */
977 public function __construct( $orig_lines, $closing_lines ) {
978
979 list( $orig_words, $orig_stripped ) = $this->split( $orig_lines );
980 list( $closing_words, $closing_stripped ) = $this->split( $closing_lines );
981
982 parent::__construct( $orig_words, $closing_words,
983 $orig_stripped, $closing_stripped );
984 }
985
986 /**
987 * @param string[] $lines
988 *
989 * @return array[]
990 */
991 private function split( $lines ) {
992
993 $words = array();
994 $stripped = array();
995 $first = true;
996 foreach ( $lines as $line ) {
997 # If the line is too long, just pretend the entire line is one big word
998 # This prevents resource exhaustion problems
999 if ( $first ) {
1000 $first = false;
1001 } else {
1002 $words[] = "\n";
1003 $stripped[] = "\n";
1004 }
1005 if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
1006 $words[] = $line;
1007 $stripped[] = $line;
1008 } else {
1009 $m = array();
1010 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1011 $line, $m )
1012 ) {
1013 foreach ( $m[0] as $word ) {
1014 $words[] = $word;
1015 }
1016 foreach ( $m[1] as $stripped_word ) {
1017 $stripped[] = $stripped_word;
1018 }
1019 }
1020 }
1021 }
1022
1023 return array( $words, $stripped );
1024 }
1025
1026 /**
1027 * @return string[]
1028 */
1029 public function orig() {
1030 $orig = new HWLDFWordAccumulator;
1031
1032 foreach ( $this->edits as $edit ) {
1033 if ( $edit->type == 'copy' ) {
1034 $orig->addWords( $edit->orig );
1035 } elseif ( $edit->orig ) {
1036 $orig->addWords( $edit->orig, 'del' );
1037 }
1038 }
1039 $lines = $orig->getLines();
1040
1041 return $lines;
1042 }
1043
1044 /**
1045 * @return string[]
1046 */
1047 public function closing() {
1048 $closing = new HWLDFWordAccumulator;
1049
1050 foreach ( $this->edits as $edit ) {
1051 if ( $edit->type == 'copy' ) {
1052 $closing->addWords( $edit->closing );
1053 } elseif ( $edit->closing ) {
1054 $closing->addWords( $edit->closing, 'ins' );
1055 }
1056 }
1057 $lines = $closing->getLines();
1058
1059 return $lines;
1060 }
1061
1062 }