3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
5 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6 * You may copy this code freely under the conditions of the GPL.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
24 * @ingroup DifferenceEngine
25 * @defgroup DifferenceEngine DifferenceEngine
29 * The base class for all other DiffOp classes.
31 * The classes that extend DiffOp are: DiffOpCopy, DiffOpDelete, DiffOpAdd and
32 * DiffOpChange. FakeDiffOp also extends DiffOp, but it is not located in this file.
35 * @ingroup DifferenceEngine
37 abstract class DiffOp
{
57 public function getType() {
64 public function getOrig() {
72 public function getClosing( $i = null ) {
74 return $this->closing
;
76 if ( array_key_exists( $i, $this->closing
) ) {
77 return $this->closing
[$i];
82 abstract public function reverse();
87 public function norig() {
88 return $this->orig ?
count( $this->orig
) : 0;
94 public function nclosing() {
95 return $this->closing ?
count( $this->closing
) : 0;
100 * Extends DiffOp. Used to mark strings that have been
101 * copied from one string array to the other.
104 * @ingroup DifferenceEngine
106 class DiffOpCopy
extends DiffOp
{
107 public $type = 'copy';
109 public function __construct( $orig, $closing = false ) {
110 if ( !is_array( $closing ) ) {
114 $this->closing
= $closing;
120 public function reverse() {
121 return new DiffOpCopy( $this->closing
, $this->orig
);
126 * Extends DiffOp. Used to mark strings that have been
127 * deleted from the first string array.
130 * @ingroup DifferenceEngine
132 class DiffOpDelete
extends DiffOp
{
133 public $type = 'delete';
135 public function __construct( $lines ) {
136 $this->orig
= $lines;
137 $this->closing
= false;
143 public function reverse() {
144 return new DiffOpAdd( $this->orig
);
149 * Extends DiffOp. Used to mark strings that have been
150 * added from the first string array.
153 * @ingroup DifferenceEngine
155 class DiffOpAdd
extends DiffOp
{
156 public $type = 'add';
158 public function __construct( $lines ) {
159 $this->closing
= $lines;
164 * @return DiffOpDelete
166 public function reverse() {
167 return new DiffOpDelete( $this->closing
);
172 * Extends DiffOp. Used to mark strings that have been
173 * changed from the first string array (both added and subtracted).
176 * @ingroup DifferenceEngine
178 class DiffOpChange
extends DiffOp
{
179 public $type = 'change';
181 public function __construct( $orig, $closing ) {
183 $this->closing
= $closing;
187 * @return DiffOpChange
189 public function reverse() {
190 return new DiffOpChange( $this->closing
, $this->orig
);
195 * Class used internally by Diff to actually compute the diffs.
197 * The algorithm used here is mostly lifted from the perl module
198 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
199 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
201 * More ideas are taken from:
202 * http://www.ics.uci.edu/~eppstein/161/960229.html
204 * Some ideas (and a bit of code) are from analyze.c, from GNU
205 * diffutils-2.7, which can be found at:
206 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
208 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
211 * Line length limits for robustness added by Tim Starling, 2005-08-31
212 * Alternative implementation added by Guy Van den Broeck, 2008-07-30
214 * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
216 * @ingroup DifferenceEngine
219 const MAX_XREF_LENGTH
= 10000;
221 protected $xchanged, $ychanged;
223 protected $xv = [], $yv = [];
224 protected $xind = [], $yind = [];
226 protected $seq = [], $in_seq = [];
231 * @param string[] $from_lines
232 * @param string[] $to_lines
236 public function diff( $from_lines, $to_lines ) {
238 // Diff and store locally
239 $this->diffLocal( $from_lines, $to_lines );
241 // Merge edits when possible
242 $this->shiftBoundaries( $from_lines, $this->xchanged
, $this->ychanged
);
243 $this->shiftBoundaries( $to_lines, $this->ychanged
, $this->xchanged
);
245 // Compute the edit operations.
246 $n_from = count( $from_lines );
247 $n_to = count( $to_lines );
251 while ( $xi < $n_from ||
$yi < $n_to ) {
252 assert( $yi < $n_to ||
$this->xchanged
[$xi] );
253 assert( $xi < $n_from ||
$this->ychanged
[$yi] );
255 // Skip matching "snake".
257 while ( $xi < $n_from && $yi < $n_to
258 && !$this->xchanged
[$xi] && !$this->ychanged
[$yi]
260 $copy[] = $from_lines[$xi++
];
264 $edits[] = new DiffOpCopy( $copy );
267 // Find deletes & adds.
269 while ( $xi < $n_from && $this->xchanged
[$xi] ) {
270 $delete[] = $from_lines[$xi++
];
274 while ( $yi < $n_to && $this->ychanged
[$yi] ) {
275 $add[] = $to_lines[$yi++
];
278 if ( $delete && $add ) {
279 $edits[] = new DiffOpChange( $delete, $add );
280 } elseif ( $delete ) {
281 $edits[] = new DiffOpDelete( $delete );
283 $edits[] = new DiffOpAdd( $add );
291 * @param string[] $from_lines
292 * @param string[] $to_lines
294 private function diffLocal( $from_lines, $to_lines ) {
295 $wikidiff3 = new WikiDiff3();
296 $wikidiff3->diff( $from_lines, $to_lines );
297 $this->xchanged
= $wikidiff3->removed
;
298 $this->ychanged
= $wikidiff3->added
;
302 * Adjust inserts/deletes of identical lines to join changes
303 * as much as possible.
305 * We do something when a run of changed lines include a
306 * line at one end and has an excluded, identical line at the other.
307 * We are free to choose which identical line is included.
308 * `compareseq' usually chooses the one at the beginning,
309 * but usually it is cleaner to consider the following identical line
310 * to be the "change".
312 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
314 private function shiftBoundaries( $lines, &$changed, $other_changed ) {
318 assert( count( $lines ) == count( $changed ) );
319 $len = count( $lines );
320 $other_len = count( $other_changed );
324 * Scan forwards to find beginning of another run of changes.
325 * Also keep track of the corresponding point in the other file.
327 * Throughout this code, $i and $j are adjusted together so that
328 * the first $i elements of $changed and the first $j elements
329 * of $other_changed both contain the same number of zeros
331 * Furthermore, $j is always kept so that $j == $other_len or
332 * $other_changed[$j] == false.
334 while ( $j < $other_len && $other_changed[$j] ) {
338 while ( $i < $len && !$changed[$i] ) {
339 assert( $j < $other_len && ! $other_changed[$j] );
342 while ( $j < $other_len && $other_changed[$j] ) {
353 // Find the end of this run of changes.
354 while ( ++
$i < $len && $changed[$i] ) {
360 * Record the length of this run of changes, so that
361 * we can later determine whether the run has grown.
363 $runlength = $i - $start;
366 * Move the changed region back, so long as the
367 * previous unchanged line matches the last changed one.
368 * This merges with previous changed regions.
370 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
371 $changed[--$start] = 1;
372 $changed[--$i] = false;
373 while ( $start > 0 && $changed[$start - 1] ) {
377 while ( $other_changed[--$j] ) {
380 assert( $j >= 0 && !$other_changed[$j] );
384 * Set CORRESPONDING to the end of the changed run, at the last
385 * point where it corresponds to a changed run in the other file.
386 * CORRESPONDING == LEN means no such point has been found.
388 $corresponding = $j < $other_len ?
$i : $len;
391 * Move the changed region forward, so long as the
392 * first changed line matches the following unchanged one.
393 * This merges with following changed regions.
394 * Do this second, so that if there are no merges,
395 * the changed region is moved forward as far as possible.
397 while ( $i < $len && $lines[$start] == $lines[$i] ) {
398 $changed[$start++
] = false;
400 while ( $i < $len && $changed[$i] ) {
404 assert( $j < $other_len && ! $other_changed[$j] );
406 if ( $j < $other_len && $other_changed[$j] ) {
408 while ( $j < $other_len && $other_changed[$j] ) {
413 } while ( $runlength != $i - $start );
416 * If possible, move the fully-merged run of changes
417 * back to a corresponding run in the other file.
419 while ( $corresponding < $i ) {
420 $changed[--$start] = 1;
423 while ( $other_changed[--$j] ) {
426 assert( $j >= 0 && !$other_changed[$j] );
433 * Class representing a 'diff' between two sequences of strings.
436 * @ingroup DifferenceEngine
447 * Computes diff between sequences of strings.
449 * @param string[] $from_lines An array of strings.
450 * Typically these are lines from a file.
451 * @param string[] $to_lines An array of strings.
453 public function __construct( $from_lines, $to_lines ) {
454 $eng = new DiffEngine
;
455 $this->edits
= $eng->diff( $from_lines, $to_lines );
461 public function getEdits() {
466 * Compute reversed Diff.
470 * $diff = new Diff($lines1, $lines2);
471 * $rev = $diff->reverse();
473 * @return Object A Diff object representing the inverse of the
476 public function reverse() {
479 /** @var DiffOp $edit */
480 foreach ( $this->edits
as $edit ) {
481 $rev->edits
[] = $edit->reverse();
488 * Check for empty diff.
490 * @return bool True if two sequences were identical.
492 public function isEmpty() {
493 foreach ( $this->edits
as $edit ) {
494 if ( $edit->type
!= 'copy' ) {
503 * Compute the length of the Longest Common Subsequence (LCS).
505 * This is mostly for diagnostic purposed.
507 * @return int The length of the LCS.
509 public function lcs() {
511 foreach ( $this->edits
as $edit ) {
512 if ( $edit->type
== 'copy' ) {
513 $lcs +
= count( $edit->orig
);
521 * Get the original set of lines.
523 * This reconstructs the $from_lines parameter passed to the
526 * @return string[] The original sequence of strings.
528 public function orig() {
531 foreach ( $this->edits
as $edit ) {
533 array_splice( $lines, count( $lines ), 0, $edit->orig
);
541 * Get the closing set of lines.
543 * This reconstructs the $to_lines parameter passed to the
546 * @return string[] The sequence of strings.
548 public function closing() {
551 foreach ( $this->edits
as $edit ) {
552 if ( $edit->closing
) {
553 array_splice( $lines, count( $lines ), 0, $edit->closing
);
562 * @deprecated Alias for WordAccumulator, to be soon removed
564 class HWLDFWordAccumulator
extends MediaWiki\Diff\WordAccumulator
{