Remove unused stuff from diffs
[lhc/web/wiklou.git] / includes / diff / DairikiDiff.php
1 <?php
2 /**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6 * You may copy this code freely under the conditions of the GPL.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup DifferenceEngine
25 * @defgroup DifferenceEngine DifferenceEngine
26 */
27
28 /**
29 * The base class for all other DiffOp classes.
30 *
31 * The classes that extend DiffOp are: DiffOpCopy, DiffOpDelete, DiffOpAdd and
32 * DiffOpChange. FakeDiffOp also extends DiffOp, but it is not located in this file.
33 *
34 * @private
35 * @ingroup DifferenceEngine
36 */
37 abstract class DiffOp {
38
39 /**
40 * @var string
41 */
42 public $type;
43
44 /**
45 * @var string[]
46 */
47 public $orig;
48
49 /**
50 * @var string[]
51 */
52 public $closing;
53
54 /**
55 * @return string
56 */
57 public function getType() {
58 return $this->type;
59 }
60
61 /**
62 * @return string[]
63 */
64 public function getOrig() {
65 return $this->orig;
66 }
67
68 /**
69 * @param int $i
70 * @return string|null
71 */
72 public function getClosing( $i = null ) {
73 if ( $i === null ) {
74 return $this->closing;
75 }
76 if ( array_key_exists( $i, $this->closing ) ) {
77 return $this->closing[$i];
78 }
79 return null;
80 }
81
82 abstract public function reverse();
83
84 /**
85 * @return int
86 */
87 public function norig() {
88 return $this->orig ? count( $this->orig ) : 0;
89 }
90
91 /**
92 * @return int
93 */
94 public function nclosing() {
95 return $this->closing ? count( $this->closing ) : 0;
96 }
97 }
98
99 /**
100 * Extends DiffOp. Used to mark strings that have been
101 * copied from one string array to the other.
102 *
103 * @private
104 * @ingroup DifferenceEngine
105 */
106 class DiffOpCopy extends DiffOp {
107 public $type = 'copy';
108
109 public function __construct( $orig, $closing = false ) {
110 if ( !is_array( $closing ) ) {
111 $closing = $orig;
112 }
113 $this->orig = $orig;
114 $this->closing = $closing;
115 }
116
117 /**
118 * @return DiffOpCopy
119 */
120 public function reverse() {
121 return new DiffOpCopy( $this->closing, $this->orig );
122 }
123 }
124
125 /**
126 * Extends DiffOp. Used to mark strings that have been
127 * deleted from the first string array.
128 *
129 * @private
130 * @ingroup DifferenceEngine
131 */
132 class DiffOpDelete extends DiffOp {
133 public $type = 'delete';
134
135 public function __construct( $lines ) {
136 $this->orig = $lines;
137 $this->closing = false;
138 }
139
140 /**
141 * @return DiffOpAdd
142 */
143 public function reverse() {
144 return new DiffOpAdd( $this->orig );
145 }
146 }
147
148 /**
149 * Extends DiffOp. Used to mark strings that have been
150 * added from the first string array.
151 *
152 * @private
153 * @ingroup DifferenceEngine
154 */
155 class DiffOpAdd extends DiffOp {
156 public $type = 'add';
157
158 public function __construct( $lines ) {
159 $this->closing = $lines;
160 $this->orig = false;
161 }
162
163 /**
164 * @return DiffOpDelete
165 */
166 public function reverse() {
167 return new DiffOpDelete( $this->closing );
168 }
169 }
170
171 /**
172 * Extends DiffOp. Used to mark strings that have been
173 * changed from the first string array (both added and subtracted).
174 *
175 * @private
176 * @ingroup DifferenceEngine
177 */
178 class DiffOpChange extends DiffOp {
179 public $type = 'change';
180
181 public function __construct( $orig, $closing ) {
182 $this->orig = $orig;
183 $this->closing = $closing;
184 }
185
186 /**
187 * @return DiffOpChange
188 */
189 public function reverse() {
190 return new DiffOpChange( $this->closing, $this->orig );
191 }
192 }
193
194 /**
195 * Class used internally by Diff to actually compute the diffs.
196 *
197 * The algorithm used here is mostly lifted from the perl module
198 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
199 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
200 *
201 * More ideas are taken from:
202 * http://www.ics.uci.edu/~eppstein/161/960229.html
203 *
204 * Some ideas (and a bit of code) are from analyze.c, from GNU
205 * diffutils-2.7, which can be found at:
206 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
207 *
208 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
209 * are my own.
210 *
211 * Line length limits for robustness added by Tim Starling, 2005-08-31
212 * Alternative implementation added by Guy Van den Broeck, 2008-07-30
213 *
214 * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
215 * @private
216 * @ingroup DifferenceEngine
217 */
218 class DiffEngine {
219 protected $xchanged, $ychanged;
220
221 /**
222 * @param string[] $from_lines
223 * @param string[] $to_lines
224 *
225 * @return DiffOp[]
226 */
227 public function diff( $from_lines, $to_lines ) {
228
229 // Diff and store locally
230 $this->diffLocal( $from_lines, $to_lines );
231
232 // Merge edits when possible
233 $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged );
234 $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged );
235
236 // Compute the edit operations.
237 $n_from = count( $from_lines );
238 $n_to = count( $to_lines );
239
240 $edits = [];
241 $xi = $yi = 0;
242 while ( $xi < $n_from || $yi < $n_to ) {
243 assert( $yi < $n_to || $this->xchanged[$xi] );
244 assert( $xi < $n_from || $this->ychanged[$yi] );
245
246 // Skip matching "snake".
247 $copy = [];
248 while ( $xi < $n_from && $yi < $n_to
249 && !$this->xchanged[$xi] && !$this->ychanged[$yi]
250 ) {
251 $copy[] = $from_lines[$xi++];
252 ++$yi;
253 }
254 if ( $copy ) {
255 $edits[] = new DiffOpCopy( $copy );
256 }
257
258 // Find deletes & adds.
259 $delete = [];
260 while ( $xi < $n_from && $this->xchanged[$xi] ) {
261 $delete[] = $from_lines[$xi++];
262 }
263
264 $add = [];
265 while ( $yi < $n_to && $this->ychanged[$yi] ) {
266 $add[] = $to_lines[$yi++];
267 }
268
269 if ( $delete && $add ) {
270 $edits[] = new DiffOpChange( $delete, $add );
271 } elseif ( $delete ) {
272 $edits[] = new DiffOpDelete( $delete );
273 } elseif ( $add ) {
274 $edits[] = new DiffOpAdd( $add );
275 }
276 }
277
278 return $edits;
279 }
280
281 /**
282 * @param string[] $from_lines
283 * @param string[] $to_lines
284 */
285 private function diffLocal( $from_lines, $to_lines ) {
286 $wikidiff3 = new WikiDiff3();
287 $wikidiff3->diff( $from_lines, $to_lines );
288 $this->xchanged = $wikidiff3->removed;
289 $this->ychanged = $wikidiff3->added;
290 }
291
292 /**
293 * Adjust inserts/deletes of identical lines to join changes
294 * as much as possible.
295 *
296 * We do something when a run of changed lines include a
297 * line at one end and has an excluded, identical line at the other.
298 * We are free to choose which identical line is included.
299 * `compareseq' usually chooses the one at the beginning,
300 * but usually it is cleaner to consider the following identical line
301 * to be the "change".
302 *
303 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
304 */
305 private function shiftBoundaries( $lines, &$changed, $other_changed ) {
306 $i = 0;
307 $j = 0;
308
309 assert( count( $lines ) == count( $changed ) );
310 $len = count( $lines );
311 $other_len = count( $other_changed );
312
313 while ( 1 ) {
314 /*
315 * Scan forwards to find beginning of another run of changes.
316 * Also keep track of the corresponding point in the other file.
317 *
318 * Throughout this code, $i and $j are adjusted together so that
319 * the first $i elements of $changed and the first $j elements
320 * of $other_changed both contain the same number of zeros
321 * (unchanged lines).
322 * Furthermore, $j is always kept so that $j == $other_len or
323 * $other_changed[$j] == false.
324 */
325 while ( $j < $other_len && $other_changed[$j] ) {
326 $j++;
327 }
328
329 while ( $i < $len && !$changed[$i] ) {
330 assert( $j < $other_len && ! $other_changed[$j] );
331 $i++;
332 $j++;
333 while ( $j < $other_len && $other_changed[$j] ) {
334 $j++;
335 }
336 }
337
338 if ( $i == $len ) {
339 break;
340 }
341
342 $start = $i;
343
344 // Find the end of this run of changes.
345 while ( ++$i < $len && $changed[$i] ) {
346 continue;
347 }
348
349 do {
350 /*
351 * Record the length of this run of changes, so that
352 * we can later determine whether the run has grown.
353 */
354 $runlength = $i - $start;
355
356 /*
357 * Move the changed region back, so long as the
358 * previous unchanged line matches the last changed one.
359 * This merges with previous changed regions.
360 */
361 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
362 $changed[--$start] = 1;
363 $changed[--$i] = false;
364 while ( $start > 0 && $changed[$start - 1] ) {
365 $start--;
366 }
367 assert( $j > 0 );
368 while ( $other_changed[--$j] ) {
369 continue;
370 }
371 assert( $j >= 0 && !$other_changed[$j] );
372 }
373
374 /*
375 * Set CORRESPONDING to the end of the changed run, at the last
376 * point where it corresponds to a changed run in the other file.
377 * CORRESPONDING == LEN means no such point has been found.
378 */
379 $corresponding = $j < $other_len ? $i : $len;
380
381 /*
382 * Move the changed region forward, so long as the
383 * first changed line matches the following unchanged one.
384 * This merges with following changed regions.
385 * Do this second, so that if there are no merges,
386 * the changed region is moved forward as far as possible.
387 */
388 while ( $i < $len && $lines[$start] == $lines[$i] ) {
389 $changed[$start++] = false;
390 $changed[$i++] = 1;
391 while ( $i < $len && $changed[$i] ) {
392 $i++;
393 }
394
395 assert( $j < $other_len && ! $other_changed[$j] );
396 $j++;
397 if ( $j < $other_len && $other_changed[$j] ) {
398 $corresponding = $i;
399 while ( $j < $other_len && $other_changed[$j] ) {
400 $j++;
401 }
402 }
403 }
404 } while ( $runlength != $i - $start );
405
406 /*
407 * If possible, move the fully-merged run of changes
408 * back to a corresponding run in the other file.
409 */
410 while ( $corresponding < $i ) {
411 $changed[--$start] = 1;
412 $changed[--$i] = 0;
413 assert( $j > 0 );
414 while ( $other_changed[--$j] ) {
415 continue;
416 }
417 assert( $j >= 0 && !$other_changed[$j] );
418 }
419 }
420 }
421 }
422
423 /**
424 * Class representing a 'diff' between two sequences of strings.
425 * @todo document
426 * @private
427 * @ingroup DifferenceEngine
428 */
429 class Diff {
430
431 /**
432 * @var DiffOp[]
433 */
434 public $edits;
435
436 /**
437 * Constructor.
438 * Computes diff between sequences of strings.
439 *
440 * @param string[] $from_lines An array of strings.
441 * Typically these are lines from a file.
442 * @param string[] $to_lines An array of strings.
443 */
444 public function __construct( $from_lines, $to_lines ) {
445 $eng = new DiffEngine;
446 $this->edits = $eng->diff( $from_lines, $to_lines );
447 }
448
449 /**
450 * @return DiffOp[]
451 */
452 public function getEdits() {
453 return $this->edits;
454 }
455
456 /**
457 * Compute reversed Diff.
458 *
459 * SYNOPSIS:
460 *
461 * $diff = new Diff($lines1, $lines2);
462 * $rev = $diff->reverse();
463 *
464 * @return Object A Diff object representing the inverse of the
465 * original diff.
466 */
467 public function reverse() {
468 $rev = $this;
469 $rev->edits = [];
470 /** @var DiffOp $edit */
471 foreach ( $this->edits as $edit ) {
472 $rev->edits[] = $edit->reverse();
473 }
474
475 return $rev;
476 }
477
478 /**
479 * Check for empty diff.
480 *
481 * @return bool True if two sequences were identical.
482 */
483 public function isEmpty() {
484 foreach ( $this->edits as $edit ) {
485 if ( $edit->type != 'copy' ) {
486 return false;
487 }
488 }
489
490 return true;
491 }
492
493 /**
494 * Compute the length of the Longest Common Subsequence (LCS).
495 *
496 * This is mostly for diagnostic purposed.
497 *
498 * @return int The length of the LCS.
499 */
500 public function lcs() {
501 $lcs = 0;
502 foreach ( $this->edits as $edit ) {
503 if ( $edit->type == 'copy' ) {
504 $lcs += count( $edit->orig );
505 }
506 }
507
508 return $lcs;
509 }
510
511 /**
512 * Get the original set of lines.
513 *
514 * This reconstructs the $from_lines parameter passed to the
515 * constructor.
516 *
517 * @return string[] The original sequence of strings.
518 */
519 public function orig() {
520 $lines = [];
521
522 foreach ( $this->edits as $edit ) {
523 if ( $edit->orig ) {
524 array_splice( $lines, count( $lines ), 0, $edit->orig );
525 }
526 }
527
528 return $lines;
529 }
530
531 /**
532 * Get the closing set of lines.
533 *
534 * This reconstructs the $to_lines parameter passed to the
535 * constructor.
536 *
537 * @return string[] The sequence of strings.
538 */
539 public function closing() {
540 $lines = [];
541
542 foreach ( $this->edits as $edit ) {
543 if ( $edit->closing ) {
544 array_splice( $lines, count( $lines ), 0, $edit->closing );
545 }
546 }
547
548 return $lines;
549 }
550 }
551
552 /**
553 * @deprecated Alias for WordAccumulator, to be soon removed
554 */
555 class HWLDFWordAccumulator extends MediaWiki\Diff\WordAccumulator {
556 }