Typo/logic error in move page watchlist update.
[lhc/web/wiklou.git] / includes / DifferenceEngine.php
1 <?php
2 # See diff.doc
3
4 class DifferenceEngine {
5 /* private */ var $mOldid, $mNewid;
6 /* private */ var $mOldtitle, $mNewtitle;
7 /* private */ var $mOldtext, $mNewtext;
8 /* private */ var $mOldUser, $mNewUser;
9 /* private */ var $mOldComment, $mNewComment;
10 /* private */ var $mOldPage, $mNewPage;
11 /* private */ var $mRcidMarkPatrolled;
12
13 function DifferenceEngine( $old, $new, $rcid = 0 )
14 {
15 $this->mOldid = $old;
16 $this->mNewid = $new;
17 $this->mRcidMarkPatrolled = intval($rcid); # force it to be an integer
18 }
19
20 function showDiffPage()
21 {
22 global $wgUser, $wgTitle, $wgOut, $wgLang, $wgOnlySysopsCanPatrol, $wgUseRCPatrol;
23 $fname = "DifferenceEngine::showDiffPage";
24 wfProfileIn( $fname );
25
26 $t = $wgTitle->getPrefixedText() . " (Diff: {$this->mOldid}, " .
27 "{$this->mNewid})";
28 $mtext = wfMsg( "missingarticle", $t );
29
30 $wgOut->setArticleFlag( false );
31 if ( ! $this->loadText() ) {
32 $wgOut->setPagetitle( wfMsg( "errorpagetitle" ) );
33 $wgOut->addHTML( $mtext );
34 wfProfileOut( $fname );
35 return;
36 }
37 $wgOut->suppressQuickbar();
38
39 $oldTitle = $this->mOldPage->getPrefixedText();
40 $newTitle = $this->mNewPage->getPrefixedText();
41 if( $oldTitle == $newTitle ) {
42 $wgOut->setPageTitle( $newTitle );
43 } else {
44 $wgOut->setPageTitle( $oldTitle . ", " . $newTitle );
45 }
46 $wgOut->setSubtitle( wfMsg( "difference" ) );
47 $wgOut->setRobotpolicy( "noindex,follow" );
48
49 if ( !( $this->mOldPage->userCanRead() && $this->mNewPage->userCanRead() ) ) {
50 $wgOut->loginToUse();
51 $wgOut->output();
52 wfProfileOut( $fname );
53 exit;
54 }
55
56 $sk = $wgUser->getSkin();
57 $talk = $wgLang->getNsText( NS_TALK );
58 $contribs = wfMsg( "contribslink" );
59
60 $this->mOldComment = $sk->formatComment($this->mOldComment);
61 $this->mNewComment = $sk->formatComment($this->mNewComment);
62
63 $oldUserLink = $sk->makeLinkObj( Title::makeTitle( NS_USER, $this->mOldUser ), $this->mOldUser );
64 $newUserLink = $sk->makeLinkObj( Title::makeTitle( NS_USER, $this->mNewUser ), $this->mNewUser );
65 $oldUTLink = $sk->makeLinkObj( Title::makeTitle( NS_USER_TALK, $this->mOldUser ), $talk );
66 $newUTLink = $sk->makeLinkObj( Title::makeTitle( NS_USER_TALK, $this->mNewUser ), $talk );
67 $oldContribs = $sk->makeKnownLinkObj( Title::makeTitle( NS_SPECIAL, "Contributions" ), $contribs,
68 "target=" . urlencode($this->mOldUser) );
69 $newContribs = $sk->makeKnownLinkObj( Title::makeTitle( NS_SPECIAL, "Contributions" ), $contribs,
70 "target=" . urlencode($this->mNewUser) );
71 if ( !$this->mNewid && $wgUser->isSysop() ) {
72 $rollback = "&nbsp;&nbsp;&nbsp;<strong>[" . $sk->makeKnownLinkObj( $wgTitle, wfMsg( "rollbacklink" ),
73 "action=rollback&from=" . urlencode($this->mNewUser) ) . "]</strong>";
74 } else {
75 $rollback = "";
76 }
77 if ( $wgUseRCPatrol && $this->mRcidMarkPatrolled != 0 && $wgUser->getID() != 0 &&
78 ( $wgUser->isSysop() || !$wgOnlySysopsCanPatrol ) )
79 {
80 $patrol = " [" . $sk->makeKnownLinkObj( $wgTitle, wfMsg( 'markaspatrolleddiff' ),
81 "action=markpatrolled&rcid={$this->mRcidMarkPatrolled}" ) . "]";
82 } else {
83 $patrol = "";
84 }
85
86 $oldHeader = "<strong>{$this->mOldtitle}</strong><br />$oldUserLink " .
87 "($oldUTLink | $oldContribs)<br />" . $this->mOldComment;
88 $newHeader = "<strong>{$this->mNewtitle}</strong><br />$newUserLink " .
89 "($newUTLink | $newContribs) $rollback<br />" . $this->mNewComment . $patrol;
90
91 DifferenceEngine::showDiff( $this->mOldtext, $this->mNewtext,
92 $oldHeader, $newHeader );
93 $wgOut->addHTML( "<hr /><h2>{$this->mNewtitle}</h2>\n" );
94 $wgOut->addWikiText( $this->mNewtext );
95
96 wfProfileOut( $fname );
97 }
98
99 function showDiff( $otext, $ntext, $otitle, $ntitle )
100 {
101 global $wgOut, $wgUseExternalDiffEngine;
102
103 $otext = str_replace( "\r\n", "\n", htmlspecialchars( $otext ) );
104 $ntext = str_replace( "\r\n", "\n", htmlspecialchars( $ntext ) );
105
106
107 $wgOut->addHTML( "<table border='0' width='98%'
108 cellpadding='0' cellspacing='4px' class='diff'><tr>
109 <td colspan='2' width='50%' align='center' class='diff-otitle'>
110 {$otitle}</td>
111 <td colspan='2' width='50%' align='center' class='diff-ntitle'>
112 {$ntitle}</td>
113 </tr>\n" );
114 if ( $wgUseExternalDiffEngine ) {
115 dl("php_wikidiff.so");
116 $wgOut->addHTML( wikidiff_do_diff( $otext, $ntext, 2) );
117 } else {
118 $ota = explode( "\n", $otext);
119 $nta = explode( "\n", $ntext);
120 $diffs = new Diff( $ota, $nta );
121 $formatter = new TableDiffFormatter();
122 $formatter->format( $diffs );
123 }
124 $wgOut->addHTML( "</table>\n" );
125 }
126
127 # Load the text of the articles to compare. If newid is 0, then compare
128 # the old article in oldid to the current article; if oldid is 0, then
129 # compare the current article to the immediately previous one (ignoring
130 # the value of newid).
131 #
132 function loadText()
133 {
134 global $wgTitle, $wgOut, $wgLang;
135 $fname = "DifferenceEngine::loadText";
136
137 $dbr =& wfGetDB( DB_SLAVE );
138 if ( 0 == $this->mNewid || 0 == $this->mOldid ) {
139 $wgOut->setArticleFlag( true );
140 $this->mNewtitle = wfMsg( "currentrev" );
141 $id = $wgTitle->getArticleID();
142
143 $s = $dbr->getArray( 'cur', array( 'cur_text', 'cur_user_text', 'cur_comment' ),
144 array( 'cur_id' => $id ), $fname );
145 if ( $s === false ) {
146 return false;
147 }
148
149 $this->mNewPage = &$wgTitle;
150 $this->mNewtext = $s->cur_text;
151 $this->mNewUser = $s->cur_user_text;
152 $this->mNewComment = $s->cur_comment;
153 } else {
154 $s = $dbr->getArray( 'old', array( 'old_namespace','old_title','old_timestamp', 'old_text',
155 'old_flags','old_user_text','old_comment' ), array( 'old_id' => $this->mNewid ), $fname );
156
157 if ( $s === false ) {
158 return false;
159 }
160
161 $this->mNewtext = Article::getRevisionText( $s );
162
163 $t = $wgLang->timeanddate( $s->old_timestamp, true );
164 $this->mNewPage = Title::MakeTitle( $s->old_namespace, $s->old_title );
165 $this->mNewtitle = wfMsg( "revisionasof", $t );
166 $this->mNewUser = $s->old_user_text;
167 $this->mNewComment = $s->old_comment;
168 }
169 if ( 0 == $this->mOldid ) {
170 $s = $dbr->getArray( 'old',
171 array( 'old_namespace','old_title','old_timestamp','old_text', 'old_flags','old_user_text','old_comment' ),
172 array( /* WHERE */
173 'old_namespace' => $this->mNewPage->getNamespace(),
174 'old_title' => $this->mNewPage->getDBkey()
175 ), $fname, array( 'ORDER BY' => 'inverse_timestamp', 'USE INDEX' => 'name_title_timestamp' )
176 );
177 } else {
178 $s = $dbr->getArray( 'old',
179 array( 'old_namespace','old_title','old_timestamp','old_text','old_flags','old_user_text','old_comment'),
180 array( 'old_id' => $this->mOldid ),
181 $fname
182 );
183 }
184 if ( $s === false ) {
185 return false;
186 }
187 $this->mOldPage = Title::MakeTitle( $s->old_namespace, $s->old_title );
188 $this->mOldtext = Article::getRevisionText( $s );
189
190 $t = $wgLang->timeanddate( $s->old_timestamp, true );
191 $this->mOldtitle = wfMsg( "revisionasof", $t );
192 $this->mOldUser = $s->old_user_text;
193 $this->mOldComment = $s->old_comment;
194
195 return true;
196 }
197 }
198
199 // A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
200 //
201 // Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
202 // You may copy this code freely under the conditions of the GPL.
203 //
204
205 define('USE_ASSERTS', function_exists('assert'));
206
207 class _DiffOp {
208 var $type;
209 var $orig;
210 var $closing;
211
212 function reverse() {
213 trigger_error("pure virtual", E_USER_ERROR);
214 }
215
216 function norig() {
217 return $this->orig ? sizeof($this->orig) : 0;
218 }
219
220 function nclosing() {
221 return $this->closing ? sizeof($this->closing) : 0;
222 }
223 }
224
225 class _DiffOp_Copy extends _DiffOp {
226 var $type = 'copy';
227
228 function _DiffOp_Copy ($orig, $closing = false) {
229 if (!is_array($closing))
230 $closing = $orig;
231 $this->orig = $orig;
232 $this->closing = $closing;
233 }
234
235 function reverse() {
236 return new _DiffOp_Copy($this->closing, $this->orig);
237 }
238 }
239
240 class _DiffOp_Delete extends _DiffOp {
241 var $type = 'delete';
242
243 function _DiffOp_Delete ($lines) {
244 $this->orig = $lines;
245 $this->closing = false;
246 }
247
248 function reverse() {
249 return new _DiffOp_Add($this->orig);
250 }
251 }
252
253 class _DiffOp_Add extends _DiffOp {
254 var $type = 'add';
255
256 function _DiffOp_Add ($lines) {
257 $this->closing = $lines;
258 $this->orig = false;
259 }
260
261 function reverse() {
262 return new _DiffOp_Delete($this->closing);
263 }
264 }
265
266 class _DiffOp_Change extends _DiffOp {
267 var $type = 'change';
268
269 function _DiffOp_Change ($orig, $closing) {
270 $this->orig = $orig;
271 $this->closing = $closing;
272 }
273
274 function reverse() {
275 return new _DiffOp_Change($this->closing, $this->orig);
276 }
277 }
278
279
280 /**
281 * Class used internally by Diff to actually compute the diffs.
282 *
283 * The algorithm used here is mostly lifted from the perl module
284 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
285 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
286 *
287 * More ideas are taken from:
288 * http://www.ics.uci.edu/~eppstein/161/960229.html
289 *
290 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
291 * diffutils-2.7, which can be found at:
292 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
293 *
294 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
295 * are my own.
296 *
297 * @author Geoffrey T. Dairiki
298 * @access private
299 */
300 class _DiffEngine
301 {
302 function diff ($from_lines, $to_lines) {
303 $n_from = sizeof($from_lines);
304 $n_to = sizeof($to_lines);
305
306 $this->xchanged = $this->ychanged = array();
307 $this->xv = $this->yv = array();
308 $this->xind = $this->yind = array();
309 unset($this->seq);
310 unset($this->in_seq);
311 unset($this->lcs);
312
313 // Skip leading common lines.
314 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
315 if ($from_lines[$skip] != $to_lines[$skip])
316 break;
317 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
318 }
319 // Skip trailing common lines.
320 $xi = $n_from; $yi = $n_to;
321 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
322 if ($from_lines[$xi] != $to_lines[$yi])
323 break;
324 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
325 }
326
327 // Ignore lines which do not exist in both files.
328 for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
329 $xhash[$from_lines[$xi]] = 1;
330 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
331 $line = $to_lines[$yi];
332 if ( ($this->ychanged[$yi] = empty($xhash[$line])) )
333 continue;
334 $yhash[$line] = 1;
335 $this->yv[] = $line;
336 $this->yind[] = $yi;
337 }
338 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
339 $line = $from_lines[$xi];
340 if ( ($this->xchanged[$xi] = empty($yhash[$line])) )
341 continue;
342 $this->xv[] = $line;
343 $this->xind[] = $xi;
344 }
345
346 // Find the LCS.
347 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
348
349 // Merge edits when possible
350 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
351 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
352
353 // Compute the edit operations.
354 $edits = array();
355 $xi = $yi = 0;
356 while ($xi < $n_from || $yi < $n_to) {
357 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
358 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
359
360 // Skip matching "snake".
361 $copy = array();
362 while ( $xi < $n_from && $yi < $n_to
363 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
364 $copy[] = $from_lines[$xi++];
365 ++$yi;
366 }
367 if ($copy)
368 $edits[] = new _DiffOp_Copy($copy);
369
370 // Find deletes & adds.
371 $delete = array();
372 while ($xi < $n_from && $this->xchanged[$xi])
373 $delete[] = $from_lines[$xi++];
374
375 $add = array();
376 while ($yi < $n_to && $this->ychanged[$yi])
377 $add[] = $to_lines[$yi++];
378
379 if ($delete && $add)
380 $edits[] = new _DiffOp_Change($delete, $add);
381 elseif ($delete)
382 $edits[] = new _DiffOp_Delete($delete);
383 elseif ($add)
384 $edits[] = new _DiffOp_Add($add);
385 }
386 return $edits;
387 }
388
389
390 /* Divide the Largest Common Subsequence (LCS) of the sequences
391 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
392 * sized segments.
393 *
394 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
395 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
396 * sub sequences. The first sub-sequence is contained in [X0, X1),
397 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
398 * that (X0, Y0) == (XOFF, YOFF) and
399 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
400 *
401 * This function assumes that the first lines of the specified portions
402 * of the two files do not match, and likewise that the last lines do not
403 * match. The caller must trim matching lines from the beginning and end
404 * of the portions it is going to specify.
405 */
406 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
407 $flip = false;
408
409 if ($xlim - $xoff > $ylim - $yoff) {
410 // Things seems faster (I'm not sure I understand why)
411 // when the shortest sequence in X.
412 $flip = true;
413 list ($xoff, $xlim, $yoff, $ylim)
414 = array( $yoff, $ylim, $xoff, $xlim);
415 }
416
417 if ($flip)
418 for ($i = $ylim - 1; $i >= $yoff; $i--)
419 $ymatches[$this->xv[$i]][] = $i;
420 else
421 for ($i = $ylim - 1; $i >= $yoff; $i--)
422 $ymatches[$this->yv[$i]][] = $i;
423
424 $this->lcs = 0;
425 $this->seq[0]= $yoff - 1;
426 $this->in_seq = array();
427 $ymids[0] = array();
428
429 $numer = $xlim - $xoff + $nchunks - 1;
430 $x = $xoff;
431 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
432 if ($chunk > 0)
433 for ($i = 0; $i <= $this->lcs; $i++)
434 $ymids[$i][$chunk-1] = $this->seq[$i];
435
436 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
437 for ( ; $x < $x1; $x++) {
438 $line = $flip ? $this->yv[$x] : $this->xv[$x];
439 if (empty($ymatches[$line]))
440 continue;
441 $matches = $ymatches[$line];
442 reset($matches);
443 while (list ($junk, $y) = each($matches))
444 if (empty($this->in_seq[$y])) {
445 $k = $this->_lcs_pos($y);
446 USE_ASSERTS && assert($k > 0);
447 $ymids[$k] = $ymids[$k-1];
448 break;
449 }
450 while (list ($junk, $y) = each($matches)) {
451 if ($y > $this->seq[$k-1]) {
452 USE_ASSERTS && assert($y < $this->seq[$k]);
453 // Optimization: this is a common case:
454 // next match is just replacing previous match.
455 $this->in_seq[$this->seq[$k]] = false;
456 $this->seq[$k] = $y;
457 $this->in_seq[$y] = 1;
458 }
459 else if (empty($this->in_seq[$y])) {
460 $k = $this->_lcs_pos($y);
461 USE_ASSERTS && assert($k > 0);
462 $ymids[$k] = $ymids[$k-1];
463 }
464 }
465 }
466 }
467
468 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
469 $ymid = $ymids[$this->lcs];
470 for ($n = 0; $n < $nchunks - 1; $n++) {
471 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
472 $y1 = $ymid[$n] + 1;
473 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
474 }
475 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
476
477 return array($this->lcs, $seps);
478 }
479
480 function _lcs_pos ($ypos) {
481 $end = $this->lcs;
482 if ($end == 0 || $ypos > $this->seq[$end]) {
483 $this->seq[++$this->lcs] = $ypos;
484 $this->in_seq[$ypos] = 1;
485 return $this->lcs;
486 }
487
488 $beg = 1;
489 while ($beg < $end) {
490 $mid = (int)(($beg + $end) / 2);
491 if ( $ypos > $this->seq[$mid] )
492 $beg = $mid + 1;
493 else
494 $end = $mid;
495 }
496
497 USE_ASSERTS && assert($ypos != $this->seq[$end]);
498
499 $this->in_seq[$this->seq[$end]] = false;
500 $this->seq[$end] = $ypos;
501 $this->in_seq[$ypos] = 1;
502 return $end;
503 }
504
505 /* Find LCS of two sequences.
506 *
507 * The results are recorded in the vectors $this->{x,y}changed[], by
508 * storing a 1 in the element for each line that is an insertion
509 * or deletion (ie. is not in the LCS).
510 *
511 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
512 *
513 * Note that XLIM, YLIM are exclusive bounds.
514 * All line numbers are origin-0 and discarded lines are not counted.
515 */
516 function _compareseq ($xoff, $xlim, $yoff, $ylim) {
517 // Slide down the bottom initial diagonal.
518 while ($xoff < $xlim && $yoff < $ylim
519 && $this->xv[$xoff] == $this->yv[$yoff]) {
520 ++$xoff;
521 ++$yoff;
522 }
523
524 // Slide up the top initial diagonal.
525 while ($xlim > $xoff && $ylim > $yoff
526 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
527 --$xlim;
528 --$ylim;
529 }
530
531 if ($xoff == $xlim || $yoff == $ylim)
532 $lcs = 0;
533 else {
534 // This is ad hoc but seems to work well.
535 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
536 //$nchunks = max(2,min(8,(int)$nchunks));
537 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
538 list ($lcs, $seps)
539 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
540 }
541
542 if ($lcs == 0) {
543 // X and Y sequences have no common subsequence:
544 // mark all changed.
545 while ($yoff < $ylim)
546 $this->ychanged[$this->yind[$yoff++]] = 1;
547 while ($xoff < $xlim)
548 $this->xchanged[$this->xind[$xoff++]] = 1;
549 }
550 else {
551 // Use the partitions to split this problem into subproblems.
552 reset($seps);
553 $pt1 = $seps[0];
554 while ($pt2 = next($seps)) {
555 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
556 $pt1 = $pt2;
557 }
558 }
559 }
560
561 /* Adjust inserts/deletes of identical lines to join changes
562 * as much as possible.
563 *
564 * We do something when a run of changed lines include a
565 * line at one end and has an excluded, identical line at the other.
566 * We are free to choose which identical line is included.
567 * `compareseq' usually chooses the one at the beginning,
568 * but usually it is cleaner to consider the following identical line
569 * to be the "change".
570 *
571 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
572 */
573 function _shift_boundaries ($lines, &$changed, $other_changed) {
574 $i = 0;
575 $j = 0;
576
577 USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
578 $len = sizeof($lines);
579 $other_len = sizeof($other_changed);
580
581 while (1) {
582 /*
583 * Scan forwards to find beginning of another run of changes.
584 * Also keep track of the corresponding point in the other file.
585 *
586 * Throughout this code, $i and $j are adjusted together so that
587 * the first $i elements of $changed and the first $j elements
588 * of $other_changed both contain the same number of zeros
589 * (unchanged lines).
590 * Furthermore, $j is always kept so that $j == $other_len or
591 * $other_changed[$j] == false.
592 */
593 while ($j < $other_len && $other_changed[$j])
594 $j++;
595
596 while ($i < $len && ! $changed[$i]) {
597 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
598 $i++; $j++;
599 while ($j < $other_len && $other_changed[$j])
600 $j++;
601 }
602
603 if ($i == $len)
604 break;
605
606 $start = $i;
607
608 // Find the end of this run of changes.
609 while (++$i < $len && $changed[$i])
610 continue;
611
612 do {
613 /*
614 * Record the length of this run of changes, so that
615 * we can later determine whether the run has grown.
616 */
617 $runlength = $i - $start;
618
619 /*
620 * Move the changed region back, so long as the
621 * previous unchanged line matches the last changed one.
622 * This merges with previous changed regions.
623 */
624 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
625 $changed[--$start] = 1;
626 $changed[--$i] = false;
627 while ($start > 0 && $changed[$start - 1])
628 $start--;
629 USE_ASSERTS && assert('$j > 0');
630 while ($other_changed[--$j])
631 continue;
632 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
633 }
634
635 /*
636 * Set CORRESPONDING to the end of the changed run, at the last
637 * point where it corresponds to a changed run in the other file.
638 * CORRESPONDING == LEN means no such point has been found.
639 */
640 $corresponding = $j < $other_len ? $i : $len;
641
642 /*
643 * Move the changed region forward, so long as the
644 * first changed line matches the following unchanged one.
645 * This merges with following changed regions.
646 * Do this second, so that if there are no merges,
647 * the changed region is moved forward as far as possible.
648 */
649 while ($i < $len && $lines[$start] == $lines[$i]) {
650 $changed[$start++] = false;
651 $changed[$i++] = 1;
652 while ($i < $len && $changed[$i])
653 $i++;
654
655 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
656 $j++;
657 if ($j < $other_len && $other_changed[$j]) {
658 $corresponding = $i;
659 while ($j < $other_len && $other_changed[$j])
660 $j++;
661 }
662 }
663 } while ($runlength != $i - $start);
664
665 /*
666 * If possible, move the fully-merged run of changes
667 * back to a corresponding run in the other file.
668 */
669 while ($corresponding < $i) {
670 $changed[--$start] = 1;
671 $changed[--$i] = 0;
672 USE_ASSERTS && assert('$j > 0');
673 while ($other_changed[--$j])
674 continue;
675 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
676 }
677 }
678 }
679 }
680
681 /**
682 * Class representing a 'diff' between two sequences of strings.
683 */
684 class Diff
685 {
686 var $edits;
687
688 /**
689 * Constructor.
690 * Computes diff between sequences of strings.
691 *
692 * @param $from_lines array An array of strings.
693 * (Typically these are lines from a file.)
694 * @param $to_lines array An array of strings.
695 */
696 function Diff($from_lines, $to_lines) {
697 $eng = new _DiffEngine;
698 $this->edits = $eng->diff($from_lines, $to_lines);
699 //$this->_check($from_lines, $to_lines);
700 }
701
702 /**
703 * Compute reversed Diff.
704 *
705 * SYNOPSIS:
706 *
707 * $diff = new Diff($lines1, $lines2);
708 * $rev = $diff->reverse();
709 * @return object A Diff object representing the inverse of the
710 * original diff.
711 */
712 function reverse () {
713 $rev = $this;
714 $rev->edits = array();
715 foreach ($this->edits as $edit) {
716 $rev->edits[] = $edit->reverse();
717 }
718 return $rev;
719 }
720
721 /**
722 * Check for empty diff.
723 *
724 * @return bool True iff two sequences were identical.
725 */
726 function isEmpty () {
727 foreach ($this->edits as $edit) {
728 if ($edit->type != 'copy')
729 return false;
730 }
731 return true;
732 }
733
734 /**
735 * Compute the length of the Longest Common Subsequence (LCS).
736 *
737 * This is mostly for diagnostic purposed.
738 *
739 * @return int The length of the LCS.
740 */
741 function lcs () {
742 $lcs = 0;
743 foreach ($this->edits as $edit) {
744 if ($edit->type == 'copy')
745 $lcs += sizeof($edit->orig);
746 }
747 return $lcs;
748 }
749
750 /**
751 * Get the original set of lines.
752 *
753 * This reconstructs the $from_lines parameter passed to the
754 * constructor.
755 *
756 * @return array The original sequence of strings.
757 */
758 function orig() {
759 $lines = array();
760
761 foreach ($this->edits as $edit) {
762 if ($edit->orig)
763 array_splice($lines, sizeof($lines), 0, $edit->orig);
764 }
765 return $lines;
766 }
767
768 /**
769 * Get the closing set of lines.
770 *
771 * This reconstructs the $to_lines parameter passed to the
772 * constructor.
773 *
774 * @return array The sequence of strings.
775 */
776 function closing() {
777 $lines = array();
778
779 foreach ($this->edits as $edit) {
780 if ($edit->closing)
781 array_splice($lines, sizeof($lines), 0, $edit->closing);
782 }
783 return $lines;
784 }
785
786 /**
787 * Check a Diff for validity.
788 *
789 * This is here only for debugging purposes.
790 */
791 function _check ($from_lines, $to_lines) {
792 if (serialize($from_lines) != serialize($this->orig()))
793 trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
794 if (serialize($to_lines) != serialize($this->closing()))
795 trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
796
797 $rev = $this->reverse();
798 if (serialize($to_lines) != serialize($rev->orig()))
799 trigger_error("Reversed original doesn't match", E_USER_ERROR);
800 if (serialize($from_lines) != serialize($rev->closing()))
801 trigger_error("Reversed closing doesn't match", E_USER_ERROR);
802
803
804 $prevtype = 'none';
805 foreach ($this->edits as $edit) {
806 if ( $prevtype == $edit->type )
807 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
808 $prevtype = $edit->type;
809 }
810
811 $lcs = $this->lcs();
812 trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
813 }
814 }
815
816 /**
817 * FIXME: bad name.
818 */
819 class MappedDiff
820 extends Diff
821 {
822 /**
823 * Constructor.
824 *
825 * Computes diff between sequences of strings.
826 *
827 * This can be used to compute things like
828 * case-insensitve diffs, or diffs which ignore
829 * changes in white-space.
830 *
831 * @param $from_lines array An array of strings.
832 * (Typically these are lines from a file.)
833 *
834 * @param $to_lines array An array of strings.
835 *
836 * @param $mapped_from_lines array This array should
837 * have the same size number of elements as $from_lines.
838 * The elements in $mapped_from_lines and
839 * $mapped_to_lines are what is actually compared
840 * when computing the diff.
841 *
842 * @param $mapped_to_lines array This array should
843 * have the same number of elements as $to_lines.
844 */
845 function MappedDiff($from_lines, $to_lines,
846 $mapped_from_lines, $mapped_to_lines) {
847
848 assert(sizeof($from_lines) == sizeof($mapped_from_lines));
849 assert(sizeof($to_lines) == sizeof($mapped_to_lines));
850
851 $this->Diff($mapped_from_lines, $mapped_to_lines);
852
853 $xi = $yi = 0;
854 for ($i = 0; $i < sizeof($this->edits); $i++) {
855 $orig = &$this->edits[$i]->orig;
856 if (is_array($orig)) {
857 $orig = array_slice($from_lines, $xi, sizeof($orig));
858 $xi += sizeof($orig);
859 }
860
861 $closing = &$this->edits[$i]->closing;
862 if (is_array($closing)) {
863 $closing = array_slice($to_lines, $yi, sizeof($closing));
864 $yi += sizeof($closing);
865 }
866 }
867 }
868 }
869
870 /**
871 * A class to format Diffs
872 *
873 * This class formats the diff in classic diff format.
874 * It is intended that this class be customized via inheritance,
875 * to obtain fancier outputs.
876 */
877 class DiffFormatter
878 {
879 /**
880 * Number of leading context "lines" to preserve.
881 *
882 * This should be left at zero for this class, but subclasses
883 * may want to set this to other values.
884 */
885 var $leading_context_lines = 0;
886
887 /**
888 * Number of trailing context "lines" to preserve.
889 *
890 * This should be left at zero for this class, but subclasses
891 * may want to set this to other values.
892 */
893 var $trailing_context_lines = 0;
894
895 /**
896 * Format a diff.
897 *
898 * @param $diff object A Diff object.
899 * @return string The formatted output.
900 */
901 function format($diff) {
902
903 $xi = $yi = 1;
904 $block = false;
905 $context = array();
906
907 $nlead = $this->leading_context_lines;
908 $ntrail = $this->trailing_context_lines;
909
910 $this->_start_diff();
911
912 foreach ($diff->edits as $edit) {
913 if ($edit->type == 'copy') {
914 if (is_array($block)) {
915 if (sizeof($edit->orig) <= $nlead + $ntrail) {
916 $block[] = $edit;
917 }
918 else{
919 if ($ntrail) {
920 $context = array_slice($edit->orig, 0, $ntrail);
921 $block[] = new _DiffOp_Copy($context);
922 }
923 $this->_block($x0, $ntrail + $xi - $x0,
924 $y0, $ntrail + $yi - $y0,
925 $block);
926 $block = false;
927 }
928 }
929 $context = $edit->orig;
930 }
931 else {
932 if (! is_array($block)) {
933 $context = array_slice($context, sizeof($context) - $nlead);
934 $x0 = $xi - sizeof($context);
935 $y0 = $yi - sizeof($context);
936 $block = array();
937 if ($context)
938 $block[] = new _DiffOp_Copy($context);
939 }
940 $block[] = $edit;
941 }
942
943 if ($edit->orig)
944 $xi += sizeof($edit->orig);
945 if ($edit->closing)
946 $yi += sizeof($edit->closing);
947 }
948
949 if (is_array($block))
950 $this->_block($x0, $xi - $x0,
951 $y0, $yi - $y0,
952 $block);
953
954 return $this->_end_diff();
955 }
956
957 function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
958 $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
959 foreach ($edits as $edit) {
960 if ($edit->type == 'copy')
961 $this->_context($edit->orig);
962 elseif ($edit->type == 'add')
963 $this->_added($edit->closing);
964 elseif ($edit->type == 'delete')
965 $this->_deleted($edit->orig);
966 elseif ($edit->type == 'change')
967 $this->_changed($edit->orig, $edit->closing);
968 else
969 trigger_error("Unknown edit type", E_USER_ERROR);
970 }
971 $this->_end_block();
972 }
973
974 function _start_diff() {
975 ob_start();
976 }
977
978 function _end_diff() {
979 $val = ob_get_contents();
980 ob_end_clean();
981 return $val;
982 }
983
984 function _block_header($xbeg, $xlen, $ybeg, $ylen) {
985 if ($xlen > 1)
986 $xbeg .= "," . ($xbeg + $xlen - 1);
987 if ($ylen > 1)
988 $ybeg .= "," . ($ybeg + $ylen - 1);
989
990 return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
991 }
992
993 function _start_block($header) {
994 echo $header;
995 }
996
997 function _end_block() {
998 }
999
1000 function _lines($lines, $prefix = ' ') {
1001 foreach ($lines as $line)
1002 echo "$prefix $line\n";
1003 }
1004
1005 function _context($lines) {
1006 $this->_lines($lines);
1007 }
1008
1009 function _added($lines) {
1010 $this->_lines($lines, ">");
1011 }
1012 function _deleted($lines) {
1013 $this->_lines($lines, "<");
1014 }
1015
1016 function _changed($orig, $closing) {
1017 $this->_deleted($orig);
1018 echo "---\n";
1019 $this->_added($closing);
1020 }
1021 }
1022
1023
1024 /**
1025 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
1026 *
1027 */
1028
1029 define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
1030
1031 class _HWLDF_WordAccumulator {
1032 function _HWLDF_WordAccumulator () {
1033 $this->_lines = array();
1034 $this->_line = '';
1035 $this->_group = '';
1036 $this->_tag = '';
1037 }
1038
1039 function _flushGroup ($new_tag) {
1040 if ($this->_group !== '') {
1041 if ($this->_tag == 'mark')
1042 $this->_line .= '<span class="diffchange">'.$this->_group.'</span>';
1043 else
1044 $this->_line .= $this->_group;
1045 }
1046 $this->_group = '';
1047 $this->_tag = $new_tag;
1048 }
1049
1050 function _flushLine ($new_tag) {
1051 $this->_flushGroup($new_tag);
1052 if ($this->_line != '')
1053 $this->_lines[] = $this->_line;
1054 $this->_line = '';
1055 }
1056
1057 function addWords ($words, $tag = '') {
1058 if ($tag != $this->_tag)
1059 $this->_flushGroup($tag);
1060
1061 foreach ($words as $word) {
1062 // new-line should only come as first char of word.
1063 if ($word == '')
1064 continue;
1065 if ($word[0] == "\n") {
1066 $this->_group .= NBSP;
1067 $this->_flushLine($tag);
1068 $word = substr($word, 1);
1069 }
1070 assert(!strstr($word, "\n"));
1071 $this->_group .= $word;
1072 }
1073 }
1074
1075 function getLines() {
1076 $this->_flushLine('~done');
1077 return $this->_lines;
1078 }
1079 }
1080
1081 class WordLevelDiff extends MappedDiff
1082 {
1083 function WordLevelDiff ($orig_lines, $closing_lines) {
1084 list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
1085 list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
1086
1087
1088 $this->MappedDiff($orig_words, $closing_words,
1089 $orig_stripped, $closing_stripped);
1090 }
1091
1092 function _split($lines) {
1093 // FIXME: fix POSIX char class.
1094 # if (!preg_match_all('/ ( [^\S\n]+ | [[:alnum:]]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1095 if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1096 implode("\n", $lines),
1097 $m)) {
1098 return array(array(''), array(''));
1099 }
1100 return array($m[0], $m[1]);
1101 }
1102
1103 function orig () {
1104 $orig = new _HWLDF_WordAccumulator;
1105
1106 foreach ($this->edits as $edit) {
1107 if ($edit->type == 'copy')
1108 $orig->addWords($edit->orig);
1109 elseif ($edit->orig)
1110 $orig->addWords($edit->orig, 'mark');
1111 }
1112 return $orig->getLines();
1113 }
1114
1115 function closing () {
1116 $closing = new _HWLDF_WordAccumulator;
1117
1118 foreach ($this->edits as $edit) {
1119 if ($edit->type == 'copy')
1120 $closing->addWords($edit->closing);
1121 elseif ($edit->closing)
1122 $closing->addWords($edit->closing, 'mark');
1123 }
1124 return $closing->getLines();
1125 }
1126 }
1127
1128 /**
1129 * Wikipedia Table style diff formatter.
1130 *
1131 */
1132 class TableDiffFormatter extends DiffFormatter
1133 {
1134 function TableDiffFormatter() {
1135 $this->leading_context_lines = 2;
1136 $this->trailing_context_lines = 2;
1137 }
1138
1139 function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
1140 $l1 = wfMsg( "lineno", $xbeg );
1141 $l2 = wfMsg( "lineno", $ybeg );
1142
1143 $r = '<tr><td colspan="2" align="left"><strong>'.$l1."</strong></td>\n" .
1144 '<td colspan="2" align="left"><strong>'.$l2."</strong></td></tr>\n";
1145 return $r;
1146 }
1147
1148 function _start_block( $header ) {
1149 global $wgOut;
1150 $wgOut->addHTML( $header );
1151 }
1152
1153 function _end_block() {
1154 }
1155
1156 function _lines( $lines, $prefix=' ', $color="white" ) {
1157 }
1158
1159 function addedLine( $line ) {
1160 return '<td>+</td><td class="diff-addedline">' .
1161 $line.'</td>';
1162 }
1163
1164 function deletedLine( $line ) {
1165 return '<td>-</td><td class="diff-deletedline">' .
1166 $line.'</td>';
1167 }
1168
1169 function emptyLine() {
1170 return '<td colspan="2">&nbsp;</td>';
1171 }
1172
1173 function contextLine( $line ) {
1174 return '<td> </td><td class="diff-context">'.$line.'</td>';
1175 }
1176
1177 function _added($lines) {
1178 global $wgOut;
1179 foreach ($lines as $line) {
1180 $wgOut->addHTML( '<tr>' . $this->emptyLine() .
1181 $this->addedLine( $line ) . "</tr>\n" );
1182 }
1183 }
1184
1185 function _deleted($lines) {
1186 global $wgOut;
1187 foreach ($lines as $line) {
1188 $wgOut->addHTML( '<tr>' . $this->deletedLine( $line ) .
1189 $this->emptyLine() . "</tr>\n" );
1190 }
1191 }
1192
1193 function _context( $lines ) {
1194 global $wgOut;
1195 foreach ($lines as $line) {
1196 $wgOut->addHTML( '<tr>' . $this->contextLine( $line ) .
1197 $this->contextLine( $line ) . "</tr>\n" );
1198 }
1199 }
1200
1201 function _changed( $orig, $closing ) {
1202 global $wgOut;
1203 $diff = new WordLevelDiff( $orig, $closing );
1204 $del = $diff->orig();
1205 $add = $diff->closing();
1206
1207 while ( $line = array_shift( $del ) ) {
1208 $aline = array_shift( $add );
1209 $wgOut->addHTML( '<tr>' . $this->deletedLine( $line ) .
1210 $this->addedLine( $aline ) . "</tr>\n" );
1211 }
1212 $this->_added( $add ); # If any leftovers
1213 }
1214 }
1215
1216 ?>