Merge "Sort 'MediaWiki' resources definitions alphabetically"
[lhc/web/wiklou.git] / includes / diff / DairikiDiff.php
1 <?php
2 /**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6 * You may copy this code freely under the conditions of the GPL.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup DifferenceEngine
25 * @defgroup DifferenceEngine DifferenceEngine
26 */
27
28 /**
29 * @todo document
30 * @private
31 * @ingroup DifferenceEngine
32 */
33 abstract class DiffOp {
34 public $type;
35 public $orig;
36 public $closing;
37
38 public abstract function reverse();
39
40 /**
41 * @return int
42 */
43 function norig() {
44 return $this->orig ? count( $this->orig ) : 0;
45 }
46
47 /**
48 * @return int
49 */
50 function nclosing() {
51 return $this->closing ? count( $this->closing ) : 0;
52 }
53 }
54
55 /**
56 * @todo document
57 * @private
58 * @ingroup DifferenceEngine
59 */
60 class DiffOp_Copy extends DiffOp {
61 public $type = 'copy';
62
63 function __construct( $orig, $closing = false ) {
64 if ( !is_array( $closing ) ) {
65 $closing = $orig;
66 }
67 $this->orig = $orig;
68 $this->closing = $closing;
69 }
70
71 /**
72 * @return DiffOp_Copy
73 */
74 function reverse() {
75 return new DiffOp_Copy( $this->closing, $this->orig );
76 }
77 }
78
79 /**
80 * @todo document
81 * @private
82 * @ingroup DifferenceEngine
83 */
84 class DiffOp_Delete extends DiffOp {
85 public $type = 'delete';
86
87 function __construct( $lines ) {
88 $this->orig = $lines;
89 $this->closing = false;
90 }
91
92 /**
93 * @return DiffOp_Add
94 */
95 function reverse() {
96 return new DiffOp_Add( $this->orig );
97 }
98 }
99
100 /**
101 * @todo document
102 * @private
103 * @ingroup DifferenceEngine
104 */
105 class DiffOp_Add extends DiffOp {
106 public $type = 'add';
107
108 function __construct( $lines ) {
109 $this->closing = $lines;
110 $this->orig = false;
111 }
112
113 /**
114 * @return DiffOp_Delete
115 */
116 function reverse() {
117 return new DiffOp_Delete( $this->closing );
118 }
119 }
120
121 /**
122 * @todo document
123 * @private
124 * @ingroup DifferenceEngine
125 */
126 class DiffOp_Change extends DiffOp {
127 public $type = 'change';
128
129 function __construct( $orig, $closing ) {
130 $this->orig = $orig;
131 $this->closing = $closing;
132 }
133
134 /**
135 * @return DiffOp_Change
136 */
137 function reverse() {
138 return new DiffOp_Change( $this->closing, $this->orig );
139 }
140 }
141
142 /**
143 * Class used internally by Diff to actually compute the diffs.
144 *
145 * The algorithm used here is mostly lifted from the perl module
146 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
147 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
148 *
149 * More ideas are taken from:
150 * http://www.ics.uci.edu/~eppstein/161/960229.html
151 *
152 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
153 * diffutils-2.7, which can be found at:
154 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
155 *
156 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
157 * are my own.
158 *
159 * Line length limits for robustness added by Tim Starling, 2005-08-31
160 * Alternative implementation added by Guy Van den Broeck, 2008-07-30
161 *
162 * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
163 * @private
164 * @ingroup DifferenceEngine
165 */
166 class DiffEngine {
167
168 const MAX_XREF_LENGTH = 10000;
169
170 protected $xchanged, $ychanged;
171
172 protected $xv = array(), $yv = array();
173 protected $xind = array(), $yind = array();
174
175 protected $seq = array(), $in_seq = array();
176
177 protected $lcs = 0;
178
179 /**
180 * @param $from_lines
181 * @param $to_lines
182 * @return array
183 */
184 function diff( $from_lines, $to_lines ) {
185 wfProfileIn( __METHOD__ );
186
187 // Diff and store locally
188 $this->diffLocal( $from_lines, $to_lines );
189
190 // Merge edits when possible
191 $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged );
192 $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged );
193
194 // Compute the edit operations.
195 $n_from = count( $from_lines );
196 $n_to = count( $to_lines );
197
198 $edits = array();
199 $xi = $yi = 0;
200 while ( $xi < $n_from || $yi < $n_to ) {
201 assert( '$yi < $n_to || $this->xchanged[$xi]' );
202 assert( '$xi < $n_from || $this->ychanged[$yi]' );
203
204 // Skip matching "snake".
205 $copy = array();
206 while ( $xi < $n_from && $yi < $n_to
207 && !$this->xchanged[$xi] && !$this->ychanged[$yi] ) {
208 $copy[] = $from_lines[$xi++];
209 ++$yi;
210 }
211 if ( $copy ) {
212 $edits[] = new DiffOp_Copy( $copy );
213 }
214
215 // Find deletes & adds.
216 $delete = array();
217 while ( $xi < $n_from && $this->xchanged[$xi] ) {
218 $delete[] = $from_lines[$xi++];
219 }
220
221 $add = array();
222 while ( $yi < $n_to && $this->ychanged[$yi] ) {
223 $add[] = $to_lines[$yi++];
224 }
225
226 if ( $delete && $add ) {
227 $edits[] = new DiffOp_Change( $delete, $add );
228 } elseif ( $delete ) {
229 $edits[] = new DiffOp_Delete( $delete );
230 } elseif ( $add ) {
231 $edits[] = new DiffOp_Add( $add );
232 }
233 }
234 wfProfileOut( __METHOD__ );
235 return $edits;
236 }
237
238 /**
239 * @param $from_lines
240 * @param $to_lines
241 */
242 private function diffLocal( $from_lines, $to_lines ) {
243 global $wgExternalDiffEngine;
244 wfProfileIn( __METHOD__ );
245
246 if ( $wgExternalDiffEngine == 'wikidiff3' ) {
247 // wikidiff3
248 $wikidiff3 = new WikiDiff3();
249 $wikidiff3->diff( $from_lines, $to_lines );
250 $this->xchanged = $wikidiff3->removed;
251 $this->ychanged = $wikidiff3->added;
252 unset( $wikidiff3 );
253 } else {
254 // old diff
255 $n_from = count( $from_lines );
256 $n_to = count( $to_lines );
257 $this->xchanged = $this->ychanged = array();
258 $this->xv = $this->yv = array();
259 $this->xind = $this->yind = array();
260 unset( $this->seq );
261 unset( $this->in_seq );
262 unset( $this->lcs );
263
264 // Skip leading common lines.
265 for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
266 if ( $from_lines[$skip] !== $to_lines[$skip] ) {
267 break;
268 }
269 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
270 }
271 // Skip trailing common lines.
272 $xi = $n_from;
273 $yi = $n_to;
274 for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
275 if ( $from_lines[$xi] !== $to_lines[$yi] ) {
276 break;
277 }
278 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
279 }
280
281 // Ignore lines which do not exist in both files.
282 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
283 $xhash[$this->lineHash( $from_lines[$xi] )] = 1;
284 }
285
286 for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
287 $line = $to_lines[$yi];
288 if ( ( $this->ychanged[$yi] = empty( $xhash[$this->lineHash( $line )] ) ) ) {
289 continue;
290 }
291 $yhash[$this->lineHash( $line )] = 1;
292 $this->yv[] = $line;
293 $this->yind[] = $yi;
294 }
295 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
296 $line = $from_lines[$xi];
297 if ( ( $this->xchanged[$xi] = empty( $yhash[$this->lineHash( $line )] ) ) ) {
298 continue;
299 }
300 $this->xv[] = $line;
301 $this->xind[] = $xi;
302 }
303
304 // Find the LCS.
305 $this->compareSeq( 0, count( $this->xv ), 0, count( $this->yv ) );
306 }
307 wfProfileOut( __METHOD__ );
308 }
309
310 /**
311 * Returns the whole line if it's small enough, or the MD5 hash otherwise
312 * @param $line string
313 * @return string
314 */
315 private function lineHash( $line ) {
316 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
317 return md5( $line );
318 } else {
319 return $line;
320 }
321 }
322
323 /**
324 * Divide the Largest Common Subsequence (LCS) of the sequences
325 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
326 * sized segments.
327 *
328 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
329 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
330 * sub sequences. The first sub-sequence is contained in [X0, X1),
331 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
332 * that (X0, Y0) == (XOFF, YOFF) and
333 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
334 *
335 * This function assumes that the first lines of the specified portions
336 * of the two files do not match, and likewise that the last lines do not
337 * match. The caller must trim matching lines from the beginning and end
338 * of the portions it is going to specify.
339 * @param $xoff
340 * @param $xlim
341 * @param $yoff
342 * @param $ylim
343 * @param $nchunks
344 * @return array
345 */
346 private function diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
347 $flip = false;
348
349 if ( $xlim - $xoff > $ylim - $yoff ) {
350 // Things seems faster (I'm not sure I understand why)
351 // when the shortest sequence in X.
352 $flip = true;
353 list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
354 }
355
356 if ( $flip ) {
357 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
358 $ymatches[$this->xv[$i]][] = $i;
359 }
360 } else {
361 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
362 $ymatches[$this->yv[$i]][] = $i;
363 }
364 }
365
366 $this->lcs = 0;
367 $this->seq[0] = $yoff - 1;
368 $this->in_seq = array();
369 $ymids[0] = array();
370
371 $numer = $xlim - $xoff + $nchunks - 1;
372 $x = $xoff;
373 for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
374 if ( $chunk > 0 ) {
375 for ( $i = 0; $i <= $this->lcs; $i++ ) {
376 $ymids[$i][$chunk -1] = $this->seq[$i];
377 }
378 }
379
380 $x1 = $xoff + (int)( ( $numer + ( $xlim -$xoff ) * $chunk ) / $nchunks );
381 for ( ; $x < $x1; $x++ ) {
382 $line = $flip ? $this->yv[$x] : $this->xv[$x];
383 if ( empty( $ymatches[$line] ) ) {
384 continue;
385 }
386 $matches = $ymatches[$line];
387 reset( $matches );
388 while ( list( , $y ) = each( $matches ) ) {
389 if ( empty( $this->in_seq[$y] ) ) {
390 $k = $this->lcsPos( $y );
391 assert( '$k > 0' );
392 $ymids[$k] = $ymids[$k -1];
393 break;
394 }
395 }
396 while ( list( , $y ) = each( $matches ) ) {
397 if ( $y > $this->seq[$k -1] ) {
398 assert( '$y < $this->seq[$k]' );
399 // Optimization: this is a common case:
400 // next match is just replacing previous match.
401 $this->in_seq[$this->seq[$k]] = false;
402 $this->seq[$k] = $y;
403 $this->in_seq[$y] = 1;
404 } elseif ( empty( $this->in_seq[$y] ) ) {
405 $k = $this->lcsPos( $y );
406 assert( '$k > 0' );
407 $ymids[$k] = $ymids[$k -1];
408 }
409 }
410 }
411 }
412
413 $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
414 $ymid = $ymids[$this->lcs];
415 for ( $n = 0; $n < $nchunks - 1; $n++ ) {
416 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
417 $y1 = $ymid[$n] + 1;
418 $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
419 }
420 $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
421
422 return array( $this->lcs, $seps );
423 }
424
425 /**
426 * @param $ypos
427 * @return int
428 */
429 private function lcsPos( $ypos ) {
430 $end = $this->lcs;
431 if ( $end == 0 || $ypos > $this->seq[$end] ) {
432 $this->seq[++$this->lcs] = $ypos;
433 $this->in_seq[$ypos] = 1;
434 return $this->lcs;
435 }
436
437 $beg = 1;
438 while ( $beg < $end ) {
439 $mid = (int)( ( $beg + $end ) / 2 );
440 if ( $ypos > $this->seq[$mid] ) {
441 $beg = $mid + 1;
442 } else {
443 $end = $mid;
444 }
445 }
446
447 assert( '$ypos != $this->seq[$end]' );
448
449 $this->in_seq[$this->seq[$end]] = false;
450 $this->seq[$end] = $ypos;
451 $this->in_seq[$ypos] = 1;
452 return $end;
453 }
454
455 /**
456 * Find LCS of two sequences.
457 *
458 * The results are recorded in the vectors $this->{x,y}changed[], by
459 * storing a 1 in the element for each line that is an insertion
460 * or deletion (ie. is not in the LCS).
461 *
462 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
463 *
464 * Note that XLIM, YLIM are exclusive bounds.
465 * All line numbers are origin-0 and discarded lines are not counted.
466 * @param $xoff
467 * @param $xlim
468 * @param $yoff
469 * @param $ylim
470 */
471 private function compareSeq( $xoff, $xlim, $yoff, $ylim ) {
472 // Slide down the bottom initial diagonal.
473 while ( $xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff] ) {
474 ++$xoff;
475 ++$yoff;
476 }
477
478 // Slide up the top initial diagonal.
479 while ( $xlim > $xoff && $ylim > $yoff
480 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1] ) {
481 --$xlim;
482 --$ylim;
483 }
484
485 if ( $xoff == $xlim || $yoff == $ylim ) {
486 $lcs = 0;
487 } else {
488 // This is ad hoc but seems to work well.
489 // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
490 // $nchunks = max(2,min(8,(int)$nchunks));
491 $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
492 list( $lcs, $seps ) = $this->diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
493 }
494
495 if ( $lcs == 0 ) {
496 // X and Y sequences have no common subsequence:
497 // mark all changed.
498 while ( $yoff < $ylim ) {
499 $this->ychanged[$this->yind[$yoff++]] = 1;
500 }
501 while ( $xoff < $xlim ) {
502 $this->xchanged[$this->xind[$xoff++]] = 1;
503 }
504 } else {
505 // Use the partitions to split this problem into subproblems.
506 reset( $seps );
507 $pt1 = $seps[0];
508 while ( $pt2 = next( $seps ) ) {
509 $this->compareSeq( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
510 $pt1 = $pt2;
511 }
512 }
513 }
514
515 /**
516 * Adjust inserts/deletes of identical lines to join changes
517 * as much as possible.
518 *
519 * We do something when a run of changed lines include a
520 * line at one end and has an excluded, identical line at the other.
521 * We are free to choose which identical line is included.
522 * `compareseq' usually chooses the one at the beginning,
523 * but usually it is cleaner to consider the following identical line
524 * to be the "change".
525 *
526 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
527 */
528 private function shiftBoundaries( $lines, &$changed, $other_changed ) {
529 wfProfileIn( __METHOD__ );
530 $i = 0;
531 $j = 0;
532
533 assert( 'count($lines) == count($changed)' );
534 $len = count( $lines );
535 $other_len = count( $other_changed );
536
537 while ( 1 ) {
538 /*
539 * Scan forwards to find beginning of another run of changes.
540 * Also keep track of the corresponding point in the other file.
541 *
542 * Throughout this code, $i and $j are adjusted together so that
543 * the first $i elements of $changed and the first $j elements
544 * of $other_changed both contain the same number of zeros
545 * (unchanged lines).
546 * Furthermore, $j is always kept so that $j == $other_len or
547 * $other_changed[$j] == false.
548 */
549 while ( $j < $other_len && $other_changed[$j] ) {
550 $j++;
551 }
552
553 while ( $i < $len && ! $changed[$i] ) {
554 assert( '$j < $other_len && ! $other_changed[$j]' );
555 $i++;
556 $j++;
557 while ( $j < $other_len && $other_changed[$j] ) {
558 $j++;
559 }
560 }
561
562 if ( $i == $len ) {
563 break;
564 }
565
566 $start = $i;
567
568 // Find the end of this run of changes.
569 while ( ++$i < $len && $changed[$i] ) {
570 continue;
571 }
572
573 do {
574 /*
575 * Record the length of this run of changes, so that
576 * we can later determine whether the run has grown.
577 */
578 $runlength = $i - $start;
579
580 /*
581 * Move the changed region back, so long as the
582 * previous unchanged line matches the last changed one.
583 * This merges with previous changed regions.
584 */
585 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
586 $changed[--$start] = 1;
587 $changed[--$i] = false;
588 while ( $start > 0 && $changed[$start - 1] ) {
589 $start--;
590 }
591 assert( '$j > 0' );
592 while ( $other_changed[--$j] ) {
593 continue;
594 }
595 assert( '$j >= 0 && !$other_changed[$j]' );
596 }
597
598 /*
599 * Set CORRESPONDING to the end of the changed run, at the last
600 * point where it corresponds to a changed run in the other file.
601 * CORRESPONDING == LEN means no such point has been found.
602 */
603 $corresponding = $j < $other_len ? $i : $len;
604
605 /*
606 * Move the changed region forward, so long as the
607 * first changed line matches the following unchanged one.
608 * This merges with following changed regions.
609 * Do this second, so that if there are no merges,
610 * the changed region is moved forward as far as possible.
611 */
612 while ( $i < $len && $lines[$start] == $lines[$i] ) {
613 $changed[$start++] = false;
614 $changed[$i++] = 1;
615 while ( $i < $len && $changed[$i] ) {
616 $i++;
617 }
618
619 assert( '$j < $other_len && ! $other_changed[$j]' );
620 $j++;
621 if ( $j < $other_len && $other_changed[$j] ) {
622 $corresponding = $i;
623 while ( $j < $other_len && $other_changed[$j] ) {
624 $j++;
625 }
626 }
627 }
628 } while ( $runlength != $i - $start );
629
630 /*
631 * If possible, move the fully-merged run of changes
632 * back to a corresponding run in the other file.
633 */
634 while ( $corresponding < $i ) {
635 $changed[--$start] = 1;
636 $changed[--$i] = 0;
637 assert( '$j > 0' );
638 while ( $other_changed[--$j] ) {
639 continue;
640 }
641 assert( '$j >= 0 && !$other_changed[$j]' );
642 }
643 }
644 wfProfileOut( __METHOD__ );
645 }
646 }
647
648 /**
649 * Class representing a 'diff' between two sequences of strings.
650 * @todo document
651 * @private
652 * @ingroup DifferenceEngine
653 */
654 class Diff {
655 public $edits;
656
657 /**
658 * Constructor.
659 * Computes diff between sequences of strings.
660 *
661 * @param $from_lines array An array of strings.
662 * (Typically these are lines from a file.)
663 * @param $to_lines array An array of strings.
664 */
665 function __construct( $from_lines, $to_lines ) {
666 $eng = new DiffEngine;
667 $this->edits = $eng->diff( $from_lines, $to_lines );
668 // $this->check($from_lines, $to_lines);
669 }
670
671 /**
672 * Compute reversed Diff.
673 *
674 * SYNOPSIS:
675 *
676 * $diff = new Diff($lines1, $lines2);
677 * $rev = $diff->reverse();
678 * @return Object A Diff object representing the inverse of the
679 * original diff.
680 */
681 function reverse() {
682 $rev = $this;
683 $rev->edits = array();
684 /** @var DiffOp $edit */
685 foreach ( $this->edits as $edit ) {
686 $rev->edits[] = $edit->reverse();
687 }
688 return $rev;
689 }
690
691 /**
692 * Check for empty diff.
693 *
694 * @return bool True if two sequences were identical.
695 */
696 function isEmpty() {
697 foreach ( $this->edits as $edit ) {
698 if ( $edit->type != 'copy' ) {
699 return false;
700 }
701 }
702 return true;
703 }
704
705 /**
706 * Compute the length of the Longest Common Subsequence (LCS).
707 *
708 * This is mostly for diagnostic purposed.
709 *
710 * @return int The length of the LCS.
711 */
712 function lcs() {
713 $lcs = 0;
714 foreach ( $this->edits as $edit ) {
715 if ( $edit->type == 'copy' ) {
716 $lcs += count( $edit->orig );
717 }
718 }
719 return $lcs;
720 }
721
722 /**
723 * Get the original set of lines.
724 *
725 * This reconstructs the $from_lines parameter passed to the
726 * constructor.
727 *
728 * @return array The original sequence of strings.
729 */
730 function orig() {
731 $lines = array();
732
733 foreach ( $this->edits as $edit ) {
734 if ( $edit->orig ) {
735 array_splice( $lines, count( $lines ), 0, $edit->orig );
736 }
737 }
738 return $lines;
739 }
740
741 /**
742 * Get the closing set of lines.
743 *
744 * This reconstructs the $to_lines parameter passed to the
745 * constructor.
746 *
747 * @return array The sequence of strings.
748 */
749 function closing() {
750 $lines = array();
751
752 foreach ( $this->edits as $edit ) {
753 if ( $edit->closing ) {
754 array_splice( $lines, count( $lines ), 0, $edit->closing );
755 }
756 }
757 return $lines;
758 }
759
760 /**
761 * Check a Diff for validity.
762 *
763 * This is here only for debugging purposes.
764 * @param $from_lines
765 * @param $to_lines
766 */
767 private function check( $from_lines, $to_lines ) {
768 wfProfileIn( __METHOD__ );
769 if ( serialize( $from_lines ) != serialize( $this->orig() ) ) {
770 trigger_error( "Reconstructed original doesn't match", E_USER_ERROR );
771 }
772 if ( serialize( $to_lines ) != serialize( $this->closing() ) ) {
773 trigger_error( "Reconstructed closing doesn't match", E_USER_ERROR );
774 }
775
776 $rev = $this->reverse();
777 if ( serialize( $to_lines ) != serialize( $rev->orig() ) ) {
778 trigger_error( "Reversed original doesn't match", E_USER_ERROR );
779 }
780 if ( serialize( $from_lines ) != serialize( $rev->closing() ) ) {
781 trigger_error( "Reversed closing doesn't match", E_USER_ERROR );
782 }
783
784 $prevtype = 'none';
785 foreach ( $this->edits as $edit ) {
786 if ( $prevtype == $edit->type ) {
787 trigger_error( 'Edit sequence is non-optimal', E_USER_ERROR );
788 }
789 $prevtype = $edit->type;
790 }
791
792 $lcs = $this->lcs();
793 trigger_error( 'Diff okay: LCS = ' . $lcs, E_USER_NOTICE );
794 wfProfileOut( __METHOD__ );
795 }
796 }
797
798 /**
799 * @todo document, bad name.
800 * @private
801 * @ingroup DifferenceEngine
802 */
803 class MappedDiff extends Diff {
804 /**
805 * Constructor.
806 *
807 * Computes diff between sequences of strings.
808 *
809 * This can be used to compute things like
810 * case-insensitve diffs, or diffs which ignore
811 * changes in white-space.
812 *
813 * @param $from_lines array An array of strings.
814 * (Typically these are lines from a file.)
815 *
816 * @param $to_lines array An array of strings.
817 *
818 * @param $mapped_from_lines array This array should
819 * have the same size number of elements as $from_lines.
820 * The elements in $mapped_from_lines and
821 * $mapped_to_lines are what is actually compared
822 * when computing the diff.
823 *
824 * @param $mapped_to_lines array This array should
825 * have the same number of elements as $to_lines.
826 */
827 function __construct( $from_lines, $to_lines,
828 $mapped_from_lines, $mapped_to_lines ) {
829 wfProfileIn( __METHOD__ );
830
831 assert( 'count( $from_lines ) == count( $mapped_from_lines )' );
832 assert( 'count( $to_lines ) == count( $mapped_to_lines )' );
833
834 parent::__construct( $mapped_from_lines, $mapped_to_lines );
835
836 $xi = $yi = 0;
837 for ( $i = 0; $i < count( $this->edits ); $i++ ) {
838 $orig = &$this->edits[$i]->orig;
839 if ( is_array( $orig ) ) {
840 $orig = array_slice( $from_lines, $xi, count( $orig ) );
841 $xi += count( $orig );
842 }
843
844 $closing = &$this->edits[$i]->closing;
845 if ( is_array( $closing ) ) {
846 $closing = array_slice( $to_lines, $yi, count( $closing ) );
847 $yi += count( $closing );
848 }
849 }
850 wfProfileOut( __METHOD__ );
851 }
852 }
853
854 /**
855 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
856 */
857
858 /**
859 * @todo document
860 * @private
861 * @ingroup DifferenceEngine
862 */
863 class HWLDF_WordAccumulator {
864 private $lines = array();
865 private $line = '';
866 private $group = '';
867 private $tag = '';
868
869 /**
870 * @param $new_tag
871 */
872 private function flushGroup( $new_tag ) {
873 if ( $this->group !== '' ) {
874 if ( $this->tag == 'ins' ) {
875 $this->line .= '<ins class="diffchange diffchange-inline">' .
876 htmlspecialchars( $this->group ) . '</ins>';
877 } elseif ( $this->tag == 'del' ) {
878 $this->line .= '<del class="diffchange diffchange-inline">' .
879 htmlspecialchars( $this->group ) . '</del>';
880 } else {
881 $this->line .= htmlspecialchars( $this->group );
882 }
883 }
884 $this->group = '';
885 $this->tag = $new_tag;
886 }
887
888 /**
889 * @param $new_tag
890 */
891 private function flushLine( $new_tag ) {
892 $this->flushGroup( $new_tag );
893 if ( $this->line != '' ) {
894 array_push( $this->lines, $this->line );
895 } else {
896 # make empty lines visible by inserting an NBSP
897 array_push( $this->lines, '&#160;' );
898 }
899 $this->line = '';
900 }
901
902 /**
903 * @param $words
904 * @param $tag string
905 */
906 public function addWords( $words, $tag = '' ) {
907 if ( $tag != $this->tag ) {
908 $this->flushGroup( $tag );
909 }
910
911 foreach ( $words as $word ) {
912 // new-line should only come as first char of word.
913 if ( $word == '' ) {
914 continue;
915 }
916 if ( $word[0] == "\n" ) {
917 $this->flushLine( $tag );
918 $word = substr( $word, 1 );
919 }
920 assert( '!strstr( $word, "\n" )' );
921 $this->group .= $word;
922 }
923 }
924
925 /**
926 * @return array
927 */
928 public function getLines() {
929 $this->flushLine( '~done' );
930 return $this->lines;
931 }
932 }
933
934 /**
935 * @todo document
936 * @private
937 * @ingroup DifferenceEngine
938 */
939 class WordLevelDiff extends MappedDiff {
940 const MAX_LINE_LENGTH = 10000;
941
942 /**
943 * @param $orig_lines
944 * @param $closing_lines
945 */
946 function __construct( $orig_lines, $closing_lines ) {
947 wfProfileIn( __METHOD__ );
948
949 list( $orig_words, $orig_stripped ) = $this->split( $orig_lines );
950 list( $closing_words, $closing_stripped ) = $this->split( $closing_lines );
951
952 parent::__construct( $orig_words, $closing_words,
953 $orig_stripped, $closing_stripped );
954 wfProfileOut( __METHOD__ );
955 }
956
957 /**
958 * @param $lines
959 * @return array
960 */
961 private function split( $lines ) {
962 wfProfileIn( __METHOD__ );
963
964 $words = array();
965 $stripped = array();
966 $first = true;
967 foreach ( $lines as $line ) {
968 # If the line is too long, just pretend the entire line is one big word
969 # This prevents resource exhaustion problems
970 if ( $first ) {
971 $first = false;
972 } else {
973 $words[] = "\n";
974 $stripped[] = "\n";
975 }
976 if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
977 $words[] = $line;
978 $stripped[] = $line;
979 } else {
980 $m = array();
981 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
982 $line, $m ) )
983 {
984 foreach ( $m[0] as $word ) {
985 $words[] = $word;
986 }
987 foreach ( $m[1] as $stripped_word ) {
988 $stripped[] = $stripped_word;
989 }
990 }
991 }
992 }
993 wfProfileOut( __METHOD__ );
994 return array( $words, $stripped );
995 }
996
997 /**
998 * @return array
999 */
1000 public function orig() {
1001 wfProfileIn( __METHOD__ );
1002 $orig = new HWLDF_WordAccumulator;
1003
1004 foreach ( $this->edits as $edit ) {
1005 if ( $edit->type == 'copy' ) {
1006 $orig->addWords( $edit->orig );
1007 } elseif ( $edit->orig ) {
1008 $orig->addWords( $edit->orig, 'del' );
1009 }
1010 }
1011 $lines = $orig->getLines();
1012 wfProfileOut( __METHOD__ );
1013 return $lines;
1014 }
1015
1016 /**
1017 * @return array
1018 */
1019 public function closing() {
1020 wfProfileIn( __METHOD__ );
1021 $closing = new HWLDF_WordAccumulator;
1022
1023 foreach ( $this->edits as $edit ) {
1024 if ( $edit->type == 'copy' ) {
1025 $closing->addWords( $edit->closing );
1026 } elseif ( $edit->closing ) {
1027 $closing->addWords( $edit->closing, 'ins' );
1028 }
1029 }
1030 $lines = $closing->getLines();
1031 wfProfileOut( __METHOD__ );
1032 return $lines;
1033 }
1034 }