21d0054b85539501b6ebc48d8fa7a1020256bd2d
[lhc/web/wiklou.git] / includes / DifferenceEngine.php
1 <?php
2 /**
3 * See diff.doc
4 * @package MediaWiki
5 * @subpackage DifferenceEngine
6 */
7
8 /**
9 * @todo document
10 * @public
11 * @package MediaWiki
12 * @subpackage DifferenceEngine
13 */
14 class DifferenceEngine {
15 /**#@+
16 * @private
17 */
18 var $mOldid, $mNewid, $mTitle;
19 var $mOldtitle, $mNewtitle, $mPagetitle;
20 var $mOldtext, $mNewtext;
21 var $mOldPage, $mNewPage;
22 var $mRcidMarkPatrolled;
23 var $mOldRev, $mNewRev;
24 var $mRevisionsLoaded = false; // Have the revisions been loaded
25 var $mTextLoaded = 0; // How many text blobs have been loaded, 0, 1 or 2?
26 /**#@-*/
27
28 /**
29 * Constructor
30 * @param $titleObj Title object that the diff is associated with
31 * @param $old Integer: old ID we want to show and diff with.
32 * @param $new String: either 'prev' or 'next'.
33 * @param $rcid Integer: ??? FIXME (default 0)
34 */
35 function DifferenceEngine( $titleObj = null, $old = 0, $new = 0, $rcid = 0 ) {
36 $this->mTitle = $titleObj;
37 wfDebug("DifferenceEngine old '$old' new '$new' rcid '$rcid'\n");
38
39 if ( 'prev' === $new ) {
40 # Show diff between revision $old and the previous one.
41 # Get previous one from DB.
42 #
43 $this->mNewid = intval($old);
44
45 $this->mOldid = $this->mTitle->getPreviousRevisionID( $this->mNewid );
46
47 } elseif ( 'next' === $new ) {
48 # Show diff between revision $old and the previous one.
49 # Get previous one from DB.
50 #
51 $this->mOldid = intval($old);
52 $this->mNewid = $this->mTitle->getNextRevisionID( $this->mOldid );
53 if ( false === $this->mNewid ) {
54 # if no result, NewId points to the newest old revision. The only newer
55 # revision is cur, which is "0".
56 $this->mNewid = 0;
57 }
58
59 } else {
60 $this->mOldid = intval($old);
61 $this->mNewid = intval($new);
62 }
63 $this->mRcidMarkPatrolled = intval($rcid); # force it to be an integer
64 }
65
66 function showDiffPage( $diffOnly = false ) {
67 global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol;
68 $fname = 'DifferenceEngine::showDiffPage';
69 wfProfileIn( $fname );
70
71 # If external diffs are enabled both globally and for the user,
72 # we'll use the application/x-external-editor interface to call
73 # an external diff tool like kompare, kdiff3, etc.
74 if($wgUseExternalEditor && $wgUser->getOption('externaldiff')) {
75 global $wgInputEncoding,$wgServer,$wgScript,$wgLang;
76 $wgOut->disable();
77 header ( "Content-type: application/x-external-editor; charset=".$wgInputEncoding );
78 $url1=$this->mTitle->getFullURL("action=raw&oldid=".$this->mOldid);
79 $url2=$this->mTitle->getFullURL("action=raw&oldid=".$this->mNewid);
80 $special=$wgLang->getNsText(NS_SPECIAL);
81 $control=<<<CONTROL
82 [Process]
83 Type=Diff text
84 Engine=MediaWiki
85 Script={$wgServer}{$wgScript}
86 Special namespace={$special}
87
88 [File]
89 Extension=wiki
90 URL=$url1
91
92 [File 2]
93 Extension=wiki
94 URL=$url2
95 CONTROL;
96 echo($control);
97 return;
98 }
99
100 $wgOut->setArticleFlag( false );
101 if ( ! $this->loadRevisionData() ) {
102 $t = $this->mTitle->getPrefixedText() . " (Diff: {$this->mOldid}, {$this->mNewid})";
103 $mtext = wfMsg( 'missingarticle', "<nowiki>$t</nowiki>" );
104 $wgOut->setPagetitle( wfMsg( 'errorpagetitle' ) );
105 $wgOut->addWikitext( $mtext );
106 wfProfileOut( $fname );
107 return;
108 }
109
110 wfRunHooks( 'DiffViewHeader', array( $this, $this->mOldRev, $this->mNewRev ) );
111
112 if ( $this->mNewRev->isCurrent() ) {
113 $wgOut->setArticleFlag( true );
114 }
115
116 # mOldid is false if the difference engine is called with a "vague" query for
117 # a diff between a version V and its previous version V' AND the version V
118 # is the first version of that article. In that case, V' does not exist.
119 if ( $this->mOldid === false ) {
120 $this->showFirstRevision();
121 $this->renderNewRevision(); // should we respect $diffOnly here or not?
122 wfProfileOut( $fname );
123 return;
124 }
125
126 $wgOut->suppressQuickbar();
127
128 $oldTitle = $this->mOldPage->getPrefixedText();
129 $newTitle = $this->mNewPage->getPrefixedText();
130 if( $oldTitle == $newTitle ) {
131 $wgOut->setPageTitle( $newTitle );
132 } else {
133 $wgOut->setPageTitle( $oldTitle . ', ' . $newTitle );
134 }
135 $wgOut->setSubtitle( wfMsg( 'difference' ) );
136 $wgOut->setRobotpolicy( 'noindex,nofollow' );
137
138 if ( !( $this->mOldPage->userCanRead() && $this->mNewPage->userCanRead() ) ) {
139 $wgOut->loginToUse();
140 $wgOut->output();
141 wfProfileOut( $fname );
142 exit;
143 }
144
145 $sk = $wgUser->getSkin();
146
147 if ( $this->mNewRev->isCurrent() && $wgUser->isAllowed('rollback') ) {
148 $rollback = '&nbsp;&nbsp;&nbsp;' . $sk->generateRollback( $this->mNewRev );
149 } else {
150 $rollback = '';
151 }
152 if( $wgUseRCPatrol && $this->mRcidMarkPatrolled != 0 && $wgUser->isAllowed( 'patrol' ) ) {
153 $patrol = ' [' . $sk->makeKnownLinkObj( $this->mTitle, wfMsg( 'markaspatrolleddiff' ), "action=markpatrolled&rcid={$this->mRcidMarkPatrolled}" ) . ']';
154 } else {
155 $patrol = '';
156 }
157
158 $prevlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'previousdiff' ),
159 'diff=prev&oldid='.$this->mOldid, '', '', 'id="differences-prevlink"' );
160 if ( $this->mNewRev->isCurrent() ) {
161 $nextlink = '&nbsp;';
162 } else {
163 $nextlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'nextdiff' ),
164 'diff=next&oldid='.$this->mNewid, '', '', 'id="differences-nextlink"' );
165 }
166
167 $oldminor = '';
168 $newminor = '';
169
170 if ($this->mOldRev->mMinorEdit == 1) {
171 $oldminor = wfElement( 'span', array( 'class' => 'minor' ),
172 wfMsg( 'minoreditletter') ) . ' ';
173 }
174
175 if ($this->mNewRev->mMinorEdit == 1) {
176 $newminor = wfElement( 'span', array( 'class' => 'minor' ),
177 wfMsg( 'minoreditletter') ) . ' ';
178 }
179
180 $oldHeader = "<strong>{$this->mOldtitle}</strong><br />" .
181 $sk->revUserTools( $this->mOldRev ) . "<br />" .
182 $oldminor . $sk->revComment( $this->mOldRev, !$diffOnly ) . "<br />" .
183 $prevlink;
184 $newHeader = "<strong>{$this->mNewtitle}</strong><br />" .
185 $sk->revUserTools( $this->mNewRev ) . " $rollback<br />" .
186 $newminor . $sk->revComment( $this->mNewRev, !$diffOnly ) . "<br />" .
187 $nextlink . $patrol;
188
189 $this->showDiff( $oldHeader, $newHeader );
190
191 if ( !$diffOnly )
192 $this->renderNewRevision();
193
194 wfProfileOut( $fname );
195 }
196
197 /**
198 * Show the new revision of the page.
199 */
200 function renderNewRevision() {
201 global $wgOut;
202 $fname = 'DifferenceEngine::renderNewRevision';
203 wfProfileIn( $fname );
204
205 $wgOut->addHTML( "<hr /><h2>{$this->mPagetitle}</h2>\n" );
206
207 if( !$this->mNewRev->isCurrent() ) {
208 $oldEditSectionSetting = $wgOut->parserOptions()->setEditSection( false );
209 }
210
211 $this->loadNewText();
212 if( is_object( $this->mNewRev ) ) {
213 $wgOut->setRevisionId( $this->mNewRev->getId() );
214 }
215
216 $wgOut->addWikiTextTidy( $this->mNewtext );
217
218 if( !$this->mNewRev->isCurrent() ) {
219 $wgOut->parserOptions()->setEditSection( $oldEditSectionSetting );
220 }
221
222 wfProfileOut( $fname );
223 }
224
225 /**
226 * Show the first revision of an article. Uses normal diff headers in
227 * contrast to normal "old revision" display style.
228 */
229 function showFirstRevision() {
230 global $wgOut, $wgUser;
231
232 $fname = 'DifferenceEngine::showFirstRevision';
233 wfProfileIn( $fname );
234
235 # Get article text from the DB
236 #
237 if ( ! $this->loadNewText() ) {
238 $t = $this->mTitle->getPrefixedText() . " (Diff: {$this->mOldid}, " .
239 "{$this->mNewid})";
240 $mtext = wfMsg( 'missingarticle', "<nowiki>$t</nowiki>" );
241 $wgOut->setPagetitle( wfMsg( 'errorpagetitle' ) );
242 $wgOut->addWikitext( $mtext );
243 wfProfileOut( $fname );
244 return;
245 }
246 if ( $this->mNewRev->isCurrent() ) {
247 $wgOut->setArticleFlag( true );
248 }
249
250 # Check if user is allowed to look at this page. If not, bail out.
251 #
252 if ( !( $this->mTitle->userCanRead() ) ) {
253 $wgOut->loginToUse();
254 $wgOut->output();
255 wfProfileOut( $fname );
256 exit;
257 }
258
259 # Prepare the header box
260 #
261 $sk = $wgUser->getSkin();
262
263 $nextlink = $sk->makeKnownLinkObj( $this->mTitle, wfMsgHtml( 'nextdiff' ), 'diff=next&oldid='.$this->mNewid, '', '', 'id="differences-nextlink"' );
264 $header = "<div class=\"firstrevisionheader\" style=\"text-align: center\"><strong>{$this->mOldtitle}</strong><br />" .
265 $sk->revUserTools( $this->mNewRev ) . "<br />" .
266 $sk->revComment( $this->mNewRev ) . "<br />" .
267 $nextlink . "</div>\n";
268
269 $wgOut->addHTML( $header );
270
271 $wgOut->setSubtitle( wfMsg( 'difference' ) );
272 $wgOut->setRobotpolicy( 'noindex,nofollow' );
273
274 wfProfileOut( $fname );
275 }
276
277 /**
278 * Get the diff text, send it to $wgOut
279 * Returns false if the diff could not be generated, otherwise returns true
280 */
281 function showDiff( $otitle, $ntitle ) {
282 global $wgOut;
283 $diff = $this->getDiff( $otitle, $ntitle );
284 if ( $diff === false ) {
285 $wgOut->addWikitext( wfMsg( 'missingarticle', "<nowiki>(fixme, bug)</nowiki>" ) );
286 return false;
287 } else {
288 $wgOut->addHTML( $diff );
289 return true;
290 }
291 }
292
293 /**
294 * Get diff table, including header
295 * Note that the interface has changed, it's no longer static.
296 * Returns false on error
297 */
298 function getDiff( $otitle, $ntitle ) {
299 $body = $this->getDiffBody();
300 if ( $body === false ) {
301 return false;
302 } else {
303 $multi = $this->getMultiNotice();
304 return $this->addHeader( $body, $otitle, $ntitle, $multi );
305 }
306 }
307
308 /**
309 * Get the diff table body, without header
310 * Results are cached
311 * Returns false on error
312 */
313 function getDiffBody() {
314 global $wgMemc;
315 $fname = 'DifferenceEngine::getDiffBody';
316 wfProfileIn( $fname );
317
318 // Cacheable?
319 $key = false;
320 if ( $this->mOldid && $this->mNewid ) {
321 // Try cache
322 $key = wfMemcKey( 'diff', 'oldid', $this->mOldid, 'newid', $this->mNewid );
323 $difftext = $wgMemc->get( $key );
324 if ( $difftext ) {
325 wfIncrStats( 'diff_cache_hit' );
326 $difftext = $this->localiseLineNumbers( $difftext );
327 $difftext .= "\n<!-- diff cache key $key -->\n";
328 wfProfileOut( $fname );
329 return $difftext;
330 }
331 }
332
333 if ( !$this->loadText() ) {
334 wfProfileOut( $fname );
335 return false;
336 }
337
338 $difftext = $this->generateDiffBody( $this->mOldtext, $this->mNewtext );
339
340 // Save to cache for 7 days
341 if ( $key !== false && $difftext !== false ) {
342 wfIncrStats( 'diff_cache_miss' );
343 $wgMemc->set( $key, $difftext, 7*86400 );
344 } else {
345 wfIncrStats( 'diff_uncacheable' );
346 }
347 // Replace line numbers with the text in the user's language
348 if ( $difftext !== false ) {
349 $difftext = $this->localiseLineNumbers( $difftext );
350 }
351 wfProfileOut( $fname );
352 return $difftext;
353 }
354
355 /**
356 * Generate a diff, no caching
357 * $otext and $ntext must be already segmented
358 */
359 function generateDiffBody( $otext, $ntext ) {
360 global $wgExternalDiffEngine, $wgContLang;
361 $fname = 'DifferenceEngine::generateDiffBody';
362
363 $otext = str_replace( "\r\n", "\n", $otext );
364 $ntext = str_replace( "\r\n", "\n", $ntext );
365
366 if ( $wgExternalDiffEngine == 'wikidiff' ) {
367 # For historical reasons, external diff engine expects
368 # input text to be HTML-escaped already
369 $otext = htmlspecialchars ( $wgContLang->segmentForDiff( $otext ) );
370 $ntext = htmlspecialchars ( $wgContLang->segmentForDiff( $ntext ) );
371 if( !function_exists( 'wikidiff_do_diff' ) ) {
372 dl('php_wikidiff.so');
373 }
374 return $wgContLang->unsegementForDiff( wikidiff_do_diff( $otext, $ntext, 2 ) );
375 }
376
377 if ( $wgExternalDiffEngine == 'wikidiff2' ) {
378 # Better external diff engine, the 2 may some day be dropped
379 # This one does the escaping and segmenting itself
380 if ( !function_exists( 'wikidiff2_do_diff' ) ) {
381 wfProfileIn( "$fname-dl" );
382 @dl('php_wikidiff2.so');
383 wfProfileOut( "$fname-dl" );
384 }
385 if ( function_exists( 'wikidiff2_do_diff' ) ) {
386 wfProfileIn( 'wikidiff2_do_diff' );
387 $text = wikidiff2_do_diff( $otext, $ntext, 2 );
388 wfProfileOut( 'wikidiff2_do_diff' );
389 return $text;
390 }
391 }
392 if ( $wgExternalDiffEngine !== false ) {
393 # Diff via the shell
394 global $wgTmpDirectory;
395 $tempName1 = tempnam( $wgTmpDirectory, 'diff_' );
396 $tempName2 = tempnam( $wgTmpDirectory, 'diff_' );
397
398 $tempFile1 = fopen( $tempName1, "w" );
399 if ( !$tempFile1 ) {
400 wfProfileOut( $fname );
401 return false;
402 }
403 $tempFile2 = fopen( $tempName2, "w" );
404 if ( !$tempFile2 ) {
405 wfProfileOut( $fname );
406 return false;
407 }
408 fwrite( $tempFile1, $otext );
409 fwrite( $tempFile2, $ntext );
410 fclose( $tempFile1 );
411 fclose( $tempFile2 );
412 $cmd = wfEscapeShellArg( $wgExternalDiffEngine, $tempName1, $tempName2 );
413 wfProfileIn( "$fname-shellexec" );
414 $difftext = wfShellExec( $cmd );
415 wfProfileOut( "$fname-shellexec" );
416 unlink( $tempName1 );
417 unlink( $tempName2 );
418 return $difftext;
419 }
420
421 # Native PHP diff
422 $ota = explode( "\n", $wgContLang->segmentForDiff( $otext ) );
423 $nta = explode( "\n", $wgContLang->segmentForDiff( $ntext ) );
424 $diffs = new Diff( $ota, $nta );
425 $formatter = new TableDiffFormatter();
426 return $wgContLang->unsegmentForDiff( $formatter->format( $diffs ) );
427 }
428
429
430 /**
431 * Replace line numbers with the text in the user's language
432 */
433 function localiseLineNumbers( $text ) {
434 return preg_replace_callback( '/<!--LINE (\d+)-->/',
435 array( &$this, 'localiseLineNumbersCb' ), $text );
436 }
437
438 function localiseLineNumbersCb( $matches ) {
439 global $wgLang;
440 return wfMsgExt( 'lineno', array('parseinline'), $wgLang->formatNum( $matches[1] ) );
441 }
442
443
444 /**
445 * If there are revisions between the ones being compared, return a note saying so.
446 */
447 function getMultiNotice() {
448 if ( !is_object($this->mOldRev) || !is_object($this->mNewRev) )
449 return '';
450
451 if( !$this->mOldPage->equals( $this->mNewPage ) ) {
452 // Comparing two different pages? Count would be meaningless.
453 return '';
454 }
455
456 $oldid = $this->mOldRev->getId();
457 $newid = $this->mNewRev->getId();
458 if ( $oldid > $newid ) {
459 $tmp = $oldid; $oldid = $newid; $newid = $tmp;
460 }
461
462 $n = $this->mTitle->countRevisionsBetween( $oldid, $newid );
463 if ( !$n )
464 return '';
465
466 return wfMsgExt( 'diff-multi', array( 'parseinline' ), $n );
467 }
468
469
470 /**
471 * Add the header to a diff body
472 */
473 function addHeader( $diff, $otitle, $ntitle, $multi = '' ) {
474 $header = "
475 <table border='0' width='98%' cellpadding='0' cellspacing='4' class='diff'>
476 <tr>
477 <td colspan='2' width='50%' align='center' class='diff-otitle'>{$otitle}</td>
478 <td colspan='2' width='50%' align='center' class='diff-ntitle'>{$ntitle}</td>
479 </tr>
480 ";
481
482 if ( $multi != '' )
483 $header .= "<tr><td colspan='4' align='center' class='diff-multi'>{$multi}</td></tr>";
484
485 return $header . $diff . "</table>";
486 }
487
488 /**
489 * Use specified text instead of loading from the database
490 */
491 function setText( $oldText, $newText ) {
492 $this->mOldtext = $oldText;
493 $this->mNewtext = $newText;
494 $this->mTextLoaded = 2;
495 }
496
497 /**
498 * Load revision metadata for the specified articles. If newid is 0, then compare
499 * the old article in oldid to the current article; if oldid is 0, then
500 * compare the current article to the immediately previous one (ignoring the
501 * value of newid).
502 *
503 * If oldid is false, leave the corresponding revision object set
504 * to false. This is impossible via ordinary user input, and is provided for
505 * API convenience.
506 */
507 function loadRevisionData() {
508 global $wgLang;
509 if ( $this->mRevisionsLoaded ) {
510 return true;
511 } else {
512 // Whether it succeeds or fails, we don't want to try again
513 $this->mRevisionsLoaded = true;
514 }
515
516 // Load the new revision object
517 if( $this->mNewid ) {
518 $this->mNewRev = Revision::newFromId( $this->mNewid );
519 } else {
520 $this->mNewRev = Revision::newFromTitle( $this->mTitle );
521 }
522
523 if( is_null( $this->mNewRev ) ) {
524 return false;
525 }
526
527 // Set assorted variables
528 $timestamp = $wgLang->timeanddate( $this->mNewRev->getTimestamp(), true );
529 $this->mNewPage = $this->mNewRev->getTitle();
530 if( $this->mNewRev->isCurrent() ) {
531 $newLink = $this->mNewPage->escapeLocalUrl();
532 $this->mPagetitle = htmlspecialchars( wfMsg( 'currentrev' ) );
533 $newEdit = $this->mNewPage->escapeLocalUrl( 'action=edit' );
534 $newUndo = $this->mNewPage->escapeLocalUrl( 'action=edit&undo=' . $this->mNewid );
535
536 $this->mNewtitle = "<a href='$newLink'>{$this->mPagetitle}</a> ($timestamp)"
537 . " (<a href='$newEdit'>" . htmlspecialchars( wfMsg( 'editold' ) ) . "</a>)"
538 . " (<a href='$newUndo'>" . htmlspecialchars( wfMsg( 'editundo' ) ) . "</a>)";
539
540 } else {
541 $newLink = $this->mNewPage->escapeLocalUrl( 'oldid=' . $this->mNewid );
542 $newEdit = $this->mNewPage->escapeLocalUrl( 'action=edit&oldid=' . $this->mNewid );
543 $newUndo = $this->mNewPage->escapeLocalUrl( 'action=edit&undo=' . $this->mNewid );
544 $this->mPagetitle = htmlspecialchars( wfMsg( 'revisionasof', $timestamp ) );
545
546 $this->mNewtitle = "<a href='$newLink'>{$this->mPagetitle}</a>"
547 . " (<a href='$newEdit'>" . htmlspecialchars( wfMsg( 'editold' ) ) . "</a>)"
548 . " (<a href='$newUndo'>" . htmlspecialchars( wfMsg( 'editundo' ) ) . "</a>)";
549 }
550
551 // Load the old revision object
552 $this->mOldRev = false;
553 if( $this->mOldid ) {
554 $this->mOldRev = Revision::newFromId( $this->mOldid );
555 } elseif ( $this->mOldid === 0 ) {
556 $rev = $this->mNewRev->getPrevious();
557 if( $rev ) {
558 $this->mOldid = $rev->getId();
559 $this->mOldRev = $rev;
560 } else {
561 // No previous revision; mark to show as first-version only.
562 $this->mOldid = false;
563 $this->mOldRev = false;
564 }
565 }/* elseif ( $this->mOldid === false ) leave mOldRev false; */
566
567 if( is_null( $this->mOldRev ) ) {
568 return false;
569 }
570
571 if ( $this->mOldRev ) {
572 $this->mOldPage = $this->mOldRev->getTitle();
573
574 $t = $wgLang->timeanddate( $this->mOldRev->getTimestamp(), true );
575 $oldLink = $this->mOldPage->escapeLocalUrl( 'oldid=' . $this->mOldid );
576 $oldEdit = $this->mOldPage->escapeLocalUrl( 'action=edit&oldid=' . $this->mOldid );
577 $this->mOldtitle = "<a href='$oldLink'>" . htmlspecialchars( wfMsg( 'revisionasof', $t ) )
578 . "</a> (<a href='$oldEdit'>" . htmlspecialchars( wfMsg( 'editold' ) ) . "</a>)";
579 }
580
581 return true;
582 }
583
584 /**
585 * Load the text of the revisions, as well as revision data.
586 */
587 function loadText() {
588 if ( $this->mTextLoaded == 2 ) {
589 return true;
590 } else {
591 // Whether it succeeds or fails, we don't want to try again
592 $this->mTextLoaded = 2;
593 }
594
595 if ( !$this->loadRevisionData() ) {
596 return false;
597 }
598 if ( $this->mOldRev ) {
599 // FIXME: permission tests
600 $this->mOldtext = $this->mOldRev->getText();
601 if ( $this->mOldtext === false ) {
602 return false;
603 }
604 }
605 if ( $this->mNewRev ) {
606 $this->mNewtext = $this->mNewRev->getText();
607 if ( $this->mNewtext === false ) {
608 return false;
609 }
610 }
611 return true;
612 }
613
614 /**
615 * Load the text of the new revision, not the old one
616 */
617 function loadNewText() {
618 if ( $this->mTextLoaded >= 1 ) {
619 return true;
620 } else {
621 $this->mTextLoaded = 1;
622 }
623 if ( !$this->loadRevisionData() ) {
624 return false;
625 }
626 $this->mNewtext = $this->mNewRev->getText();
627 return true;
628 }
629
630
631 }
632
633 // A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
634 //
635 // Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
636 // You may copy this code freely under the conditions of the GPL.
637 //
638
639 define('USE_ASSERTS', function_exists('assert'));
640
641 /**
642 * @todo document
643 * @private
644 * @package MediaWiki
645 * @subpackage DifferenceEngine
646 */
647 class _DiffOp {
648 var $type;
649 var $orig;
650 var $closing;
651
652 function reverse() {
653 trigger_error('pure virtual', E_USER_ERROR);
654 }
655
656 function norig() {
657 return $this->orig ? sizeof($this->orig) : 0;
658 }
659
660 function nclosing() {
661 return $this->closing ? sizeof($this->closing) : 0;
662 }
663 }
664
665 /**
666 * @todo document
667 * @private
668 * @package MediaWiki
669 * @subpackage DifferenceEngine
670 */
671 class _DiffOp_Copy extends _DiffOp {
672 var $type = 'copy';
673
674 function _DiffOp_Copy ($orig, $closing = false) {
675 if (!is_array($closing))
676 $closing = $orig;
677 $this->orig = $orig;
678 $this->closing = $closing;
679 }
680
681 function reverse() {
682 return new _DiffOp_Copy($this->closing, $this->orig);
683 }
684 }
685
686 /**
687 * @todo document
688 * @private
689 * @package MediaWiki
690 * @subpackage DifferenceEngine
691 */
692 class _DiffOp_Delete extends _DiffOp {
693 var $type = 'delete';
694
695 function _DiffOp_Delete ($lines) {
696 $this->orig = $lines;
697 $this->closing = false;
698 }
699
700 function reverse() {
701 return new _DiffOp_Add($this->orig);
702 }
703 }
704
705 /**
706 * @todo document
707 * @private
708 * @package MediaWiki
709 * @subpackage DifferenceEngine
710 */
711 class _DiffOp_Add extends _DiffOp {
712 var $type = 'add';
713
714 function _DiffOp_Add ($lines) {
715 $this->closing = $lines;
716 $this->orig = false;
717 }
718
719 function reverse() {
720 return new _DiffOp_Delete($this->closing);
721 }
722 }
723
724 /**
725 * @todo document
726 * @private
727 * @package MediaWiki
728 * @subpackage DifferenceEngine
729 */
730 class _DiffOp_Change extends _DiffOp {
731 var $type = 'change';
732
733 function _DiffOp_Change ($orig, $closing) {
734 $this->orig = $orig;
735 $this->closing = $closing;
736 }
737
738 function reverse() {
739 return new _DiffOp_Change($this->closing, $this->orig);
740 }
741 }
742
743
744 /**
745 * Class used internally by Diff to actually compute the diffs.
746 *
747 * The algorithm used here is mostly lifted from the perl module
748 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
749 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
750 *
751 * More ideas are taken from:
752 * http://www.ics.uci.edu/~eppstein/161/960229.html
753 *
754 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
755 * diffutils-2.7, which can be found at:
756 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
757 *
758 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
759 * are my own.
760 *
761 * Line length limits for robustness added by Tim Starling, 2005-08-31
762 *
763 * @author Geoffrey T. Dairiki, Tim Starling
764 * @private
765 * @package MediaWiki
766 * @subpackage DifferenceEngine
767 */
768 class _DiffEngine
769 {
770 const MAX_XREF_LENGTH = 10000;
771
772 function diff ($from_lines, $to_lines) {
773 $fname = '_DiffEngine::diff';
774 wfProfileIn( $fname );
775
776 $n_from = sizeof($from_lines);
777 $n_to = sizeof($to_lines);
778
779 $this->xchanged = $this->ychanged = array();
780 $this->xv = $this->yv = array();
781 $this->xind = $this->yind = array();
782 unset($this->seq);
783 unset($this->in_seq);
784 unset($this->lcs);
785
786 // Skip leading common lines.
787 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
788 if ($from_lines[$skip] !== $to_lines[$skip])
789 break;
790 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
791 }
792 // Skip trailing common lines.
793 $xi = $n_from; $yi = $n_to;
794 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
795 if ($from_lines[$xi] !== $to_lines[$yi])
796 break;
797 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
798 }
799
800 // Ignore lines which do not exist in both files.
801 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
802 $xhash[$this->_line_hash($from_lines[$xi])] = 1;
803 }
804
805 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
806 $line = $to_lines[$yi];
807 if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) )
808 continue;
809 $yhash[$this->_line_hash($line)] = 1;
810 $this->yv[] = $line;
811 $this->yind[] = $yi;
812 }
813 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
814 $line = $from_lines[$xi];
815 if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) )
816 continue;
817 $this->xv[] = $line;
818 $this->xind[] = $xi;
819 }
820
821 // Find the LCS.
822 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
823
824 // Merge edits when possible
825 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
826 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
827
828 // Compute the edit operations.
829 $edits = array();
830 $xi = $yi = 0;
831 while ($xi < $n_from || $yi < $n_to) {
832 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
833 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
834
835 // Skip matching "snake".
836 $copy = array();
837 while ( $xi < $n_from && $yi < $n_to
838 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
839 $copy[] = $from_lines[$xi++];
840 ++$yi;
841 }
842 if ($copy)
843 $edits[] = new _DiffOp_Copy($copy);
844
845 // Find deletes & adds.
846 $delete = array();
847 while ($xi < $n_from && $this->xchanged[$xi])
848 $delete[] = $from_lines[$xi++];
849
850 $add = array();
851 while ($yi < $n_to && $this->ychanged[$yi])
852 $add[] = $to_lines[$yi++];
853
854 if ($delete && $add)
855 $edits[] = new _DiffOp_Change($delete, $add);
856 elseif ($delete)
857 $edits[] = new _DiffOp_Delete($delete);
858 elseif ($add)
859 $edits[] = new _DiffOp_Add($add);
860 }
861 wfProfileOut( $fname );
862 return $edits;
863 }
864
865 /**
866 * Returns the whole line if it's small enough, or the MD5 hash otherwise
867 */
868 function _line_hash( $line ) {
869 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
870 return md5( $line );
871 } else {
872 return $line;
873 }
874 }
875
876
877 /* Divide the Largest Common Subsequence (LCS) of the sequences
878 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
879 * sized segments.
880 *
881 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
882 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
883 * sub sequences. The first sub-sequence is contained in [X0, X1),
884 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
885 * that (X0, Y0) == (XOFF, YOFF) and
886 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
887 *
888 * This function assumes that the first lines of the specified portions
889 * of the two files do not match, and likewise that the last lines do not
890 * match. The caller must trim matching lines from the beginning and end
891 * of the portions it is going to specify.
892 */
893 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
894 $fname = '_DiffEngine::_diag';
895 wfProfileIn( $fname );
896 $flip = false;
897
898 if ($xlim - $xoff > $ylim - $yoff) {
899 // Things seems faster (I'm not sure I understand why)
900 // when the shortest sequence in X.
901 $flip = true;
902 list ($xoff, $xlim, $yoff, $ylim)
903 = array( $yoff, $ylim, $xoff, $xlim);
904 }
905
906 if ($flip)
907 for ($i = $ylim - 1; $i >= $yoff; $i--)
908 $ymatches[$this->xv[$i]][] = $i;
909 else
910 for ($i = $ylim - 1; $i >= $yoff; $i--)
911 $ymatches[$this->yv[$i]][] = $i;
912
913 $this->lcs = 0;
914 $this->seq[0]= $yoff - 1;
915 $this->in_seq = array();
916 $ymids[0] = array();
917
918 $numer = $xlim - $xoff + $nchunks - 1;
919 $x = $xoff;
920 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
921 wfProfileIn( "$fname-chunk" );
922 if ($chunk > 0)
923 for ($i = 0; $i <= $this->lcs; $i++)
924 $ymids[$i][$chunk-1] = $this->seq[$i];
925
926 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
927 for ( ; $x < $x1; $x++) {
928 $line = $flip ? $this->yv[$x] : $this->xv[$x];
929 if (empty($ymatches[$line]))
930 continue;
931 $matches = $ymatches[$line];
932 reset($matches);
933 while (list ($junk, $y) = each($matches))
934 if (empty($this->in_seq[$y])) {
935 $k = $this->_lcs_pos($y);
936 USE_ASSERTS && assert($k > 0);
937 $ymids[$k] = $ymids[$k-1];
938 break;
939 }
940 while (list ( /* $junk */, $y) = each($matches)) {
941 if ($y > $this->seq[$k-1]) {
942 USE_ASSERTS && assert($y < $this->seq[$k]);
943 // Optimization: this is a common case:
944 // next match is just replacing previous match.
945 $this->in_seq[$this->seq[$k]] = false;
946 $this->seq[$k] = $y;
947 $this->in_seq[$y] = 1;
948 } else if (empty($this->in_seq[$y])) {
949 $k = $this->_lcs_pos($y);
950 USE_ASSERTS && assert($k > 0);
951 $ymids[$k] = $ymids[$k-1];
952 }
953 }
954 }
955 wfProfileOut( "$fname-chunk" );
956 }
957
958 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
959 $ymid = $ymids[$this->lcs];
960 for ($n = 0; $n < $nchunks - 1; $n++) {
961 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
962 $y1 = $ymid[$n] + 1;
963 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
964 }
965 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
966
967 wfProfileOut( $fname );
968 return array($this->lcs, $seps);
969 }
970
971 function _lcs_pos ($ypos) {
972 $fname = '_DiffEngine::_lcs_pos';
973 wfProfileIn( $fname );
974
975 $end = $this->lcs;
976 if ($end == 0 || $ypos > $this->seq[$end]) {
977 $this->seq[++$this->lcs] = $ypos;
978 $this->in_seq[$ypos] = 1;
979 wfProfileOut( $fname );
980 return $this->lcs;
981 }
982
983 $beg = 1;
984 while ($beg < $end) {
985 $mid = (int)(($beg + $end) / 2);
986 if ( $ypos > $this->seq[$mid] )
987 $beg = $mid + 1;
988 else
989 $end = $mid;
990 }
991
992 USE_ASSERTS && assert($ypos != $this->seq[$end]);
993
994 $this->in_seq[$this->seq[$end]] = false;
995 $this->seq[$end] = $ypos;
996 $this->in_seq[$ypos] = 1;
997 wfProfileOut( $fname );
998 return $end;
999 }
1000
1001 /* Find LCS of two sequences.
1002 *
1003 * The results are recorded in the vectors $this->{x,y}changed[], by
1004 * storing a 1 in the element for each line that is an insertion
1005 * or deletion (ie. is not in the LCS).
1006 *
1007 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
1008 *
1009 * Note that XLIM, YLIM are exclusive bounds.
1010 * All line numbers are origin-0 and discarded lines are not counted.
1011 */
1012 function _compareseq ($xoff, $xlim, $yoff, $ylim) {
1013 $fname = '_DiffEngine::_compareseq';
1014 wfProfileIn( $fname );
1015
1016 // Slide down the bottom initial diagonal.
1017 while ($xoff < $xlim && $yoff < $ylim
1018 && $this->xv[$xoff] == $this->yv[$yoff]) {
1019 ++$xoff;
1020 ++$yoff;
1021 }
1022
1023 // Slide up the top initial diagonal.
1024 while ($xlim > $xoff && $ylim > $yoff
1025 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
1026 --$xlim;
1027 --$ylim;
1028 }
1029
1030 if ($xoff == $xlim || $yoff == $ylim)
1031 $lcs = 0;
1032 else {
1033 // This is ad hoc but seems to work well.
1034 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
1035 //$nchunks = max(2,min(8,(int)$nchunks));
1036 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
1037 list ($lcs, $seps)
1038 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
1039 }
1040
1041 if ($lcs == 0) {
1042 // X and Y sequences have no common subsequence:
1043 // mark all changed.
1044 while ($yoff < $ylim)
1045 $this->ychanged[$this->yind[$yoff++]] = 1;
1046 while ($xoff < $xlim)
1047 $this->xchanged[$this->xind[$xoff++]] = 1;
1048 } else {
1049 // Use the partitions to split this problem into subproblems.
1050 reset($seps);
1051 $pt1 = $seps[0];
1052 while ($pt2 = next($seps)) {
1053 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
1054 $pt1 = $pt2;
1055 }
1056 }
1057 wfProfileOut( $fname );
1058 }
1059
1060 /* Adjust inserts/deletes of identical lines to join changes
1061 * as much as possible.
1062 *
1063 * We do something when a run of changed lines include a
1064 * line at one end and has an excluded, identical line at the other.
1065 * We are free to choose which identical line is included.
1066 * `compareseq' usually chooses the one at the beginning,
1067 * but usually it is cleaner to consider the following identical line
1068 * to be the "change".
1069 *
1070 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
1071 */
1072 function _shift_boundaries ($lines, &$changed, $other_changed) {
1073 $fname = '_DiffEngine::_shift_boundaries';
1074 wfProfileIn( $fname );
1075 $i = 0;
1076 $j = 0;
1077
1078 USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
1079 $len = sizeof($lines);
1080 $other_len = sizeof($other_changed);
1081
1082 while (1) {
1083 /*
1084 * Scan forwards to find beginning of another run of changes.
1085 * Also keep track of the corresponding point in the other file.
1086 *
1087 * Throughout this code, $i and $j are adjusted together so that
1088 * the first $i elements of $changed and the first $j elements
1089 * of $other_changed both contain the same number of zeros
1090 * (unchanged lines).
1091 * Furthermore, $j is always kept so that $j == $other_len or
1092 * $other_changed[$j] == false.
1093 */
1094 while ($j < $other_len && $other_changed[$j])
1095 $j++;
1096
1097 while ($i < $len && ! $changed[$i]) {
1098 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
1099 $i++; $j++;
1100 while ($j < $other_len && $other_changed[$j])
1101 $j++;
1102 }
1103
1104 if ($i == $len)
1105 break;
1106
1107 $start = $i;
1108
1109 // Find the end of this run of changes.
1110 while (++$i < $len && $changed[$i])
1111 continue;
1112
1113 do {
1114 /*
1115 * Record the length of this run of changes, so that
1116 * we can later determine whether the run has grown.
1117 */
1118 $runlength = $i - $start;
1119
1120 /*
1121 * Move the changed region back, so long as the
1122 * previous unchanged line matches the last changed one.
1123 * This merges with previous changed regions.
1124 */
1125 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
1126 $changed[--$start] = 1;
1127 $changed[--$i] = false;
1128 while ($start > 0 && $changed[$start - 1])
1129 $start--;
1130 USE_ASSERTS && assert('$j > 0');
1131 while ($other_changed[--$j])
1132 continue;
1133 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
1134 }
1135
1136 /*
1137 * Set CORRESPONDING to the end of the changed run, at the last
1138 * point where it corresponds to a changed run in the other file.
1139 * CORRESPONDING == LEN means no such point has been found.
1140 */
1141 $corresponding = $j < $other_len ? $i : $len;
1142
1143 /*
1144 * Move the changed region forward, so long as the
1145 * first changed line matches the following unchanged one.
1146 * This merges with following changed regions.
1147 * Do this second, so that if there are no merges,
1148 * the changed region is moved forward as far as possible.
1149 */
1150 while ($i < $len && $lines[$start] == $lines[$i]) {
1151 $changed[$start++] = false;
1152 $changed[$i++] = 1;
1153 while ($i < $len && $changed[$i])
1154 $i++;
1155
1156 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
1157 $j++;
1158 if ($j < $other_len && $other_changed[$j]) {
1159 $corresponding = $i;
1160 while ($j < $other_len && $other_changed[$j])
1161 $j++;
1162 }
1163 }
1164 } while ($runlength != $i - $start);
1165
1166 /*
1167 * If possible, move the fully-merged run of changes
1168 * back to a corresponding run in the other file.
1169 */
1170 while ($corresponding < $i) {
1171 $changed[--$start] = 1;
1172 $changed[--$i] = 0;
1173 USE_ASSERTS && assert('$j > 0');
1174 while ($other_changed[--$j])
1175 continue;
1176 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
1177 }
1178 }
1179 wfProfileOut( $fname );
1180 }
1181 }
1182
1183 /**
1184 * Class representing a 'diff' between two sequences of strings.
1185 * @todo document
1186 * @private
1187 * @package MediaWiki
1188 * @subpackage DifferenceEngine
1189 */
1190 class Diff
1191 {
1192 var $edits;
1193
1194 /**
1195 * Constructor.
1196 * Computes diff between sequences of strings.
1197 *
1198 * @param $from_lines array An array of strings.
1199 * (Typically these are lines from a file.)
1200 * @param $to_lines array An array of strings.
1201 */
1202 function Diff($from_lines, $to_lines) {
1203 $eng = new _DiffEngine;
1204 $this->edits = $eng->diff($from_lines, $to_lines);
1205 //$this->_check($from_lines, $to_lines);
1206 }
1207
1208 /**
1209 * Compute reversed Diff.
1210 *
1211 * SYNOPSIS:
1212 *
1213 * $diff = new Diff($lines1, $lines2);
1214 * $rev = $diff->reverse();
1215 * @return object A Diff object representing the inverse of the
1216 * original diff.
1217 */
1218 function reverse () {
1219 $rev = $this;
1220 $rev->edits = array();
1221 foreach ($this->edits as $edit) {
1222 $rev->edits[] = $edit->reverse();
1223 }
1224 return $rev;
1225 }
1226
1227 /**
1228 * Check for empty diff.
1229 *
1230 * @return bool True iff two sequences were identical.
1231 */
1232 function isEmpty () {
1233 foreach ($this->edits as $edit) {
1234 if ($edit->type != 'copy')
1235 return false;
1236 }
1237 return true;
1238 }
1239
1240 /**
1241 * Compute the length of the Longest Common Subsequence (LCS).
1242 *
1243 * This is mostly for diagnostic purposed.
1244 *
1245 * @return int The length of the LCS.
1246 */
1247 function lcs () {
1248 $lcs = 0;
1249 foreach ($this->edits as $edit) {
1250 if ($edit->type == 'copy')
1251 $lcs += sizeof($edit->orig);
1252 }
1253 return $lcs;
1254 }
1255
1256 /**
1257 * Get the original set of lines.
1258 *
1259 * This reconstructs the $from_lines parameter passed to the
1260 * constructor.
1261 *
1262 * @return array The original sequence of strings.
1263 */
1264 function orig() {
1265 $lines = array();
1266
1267 foreach ($this->edits as $edit) {
1268 if ($edit->orig)
1269 array_splice($lines, sizeof($lines), 0, $edit->orig);
1270 }
1271 return $lines;
1272 }
1273
1274 /**
1275 * Get the closing set of lines.
1276 *
1277 * This reconstructs the $to_lines parameter passed to the
1278 * constructor.
1279 *
1280 * @return array The sequence of strings.
1281 */
1282 function closing() {
1283 $lines = array();
1284
1285 foreach ($this->edits as $edit) {
1286 if ($edit->closing)
1287 array_splice($lines, sizeof($lines), 0, $edit->closing);
1288 }
1289 return $lines;
1290 }
1291
1292 /**
1293 * Check a Diff for validity.
1294 *
1295 * This is here only for debugging purposes.
1296 */
1297 function _check ($from_lines, $to_lines) {
1298 $fname = 'Diff::_check';
1299 wfProfileIn( $fname );
1300 if (serialize($from_lines) != serialize($this->orig()))
1301 trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
1302 if (serialize($to_lines) != serialize($this->closing()))
1303 trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
1304
1305 $rev = $this->reverse();
1306 if (serialize($to_lines) != serialize($rev->orig()))
1307 trigger_error("Reversed original doesn't match", E_USER_ERROR);
1308 if (serialize($from_lines) != serialize($rev->closing()))
1309 trigger_error("Reversed closing doesn't match", E_USER_ERROR);
1310
1311
1312 $prevtype = 'none';
1313 foreach ($this->edits as $edit) {
1314 if ( $prevtype == $edit->type )
1315 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
1316 $prevtype = $edit->type;
1317 }
1318
1319 $lcs = $this->lcs();
1320 trigger_error('Diff okay: LCS = '.$lcs, E_USER_NOTICE);
1321 wfProfileOut( $fname );
1322 }
1323 }
1324
1325 /**
1326 * FIXME: bad name.
1327 * @todo document
1328 * @private
1329 * @package MediaWiki
1330 * @subpackage DifferenceEngine
1331 */
1332 class MappedDiff extends Diff
1333 {
1334 /**
1335 * Constructor.
1336 *
1337 * Computes diff between sequences of strings.
1338 *
1339 * This can be used to compute things like
1340 * case-insensitve diffs, or diffs which ignore
1341 * changes in white-space.
1342 *
1343 * @param $from_lines array An array of strings.
1344 * (Typically these are lines from a file.)
1345 *
1346 * @param $to_lines array An array of strings.
1347 *
1348 * @param $mapped_from_lines array This array should
1349 * have the same size number of elements as $from_lines.
1350 * The elements in $mapped_from_lines and
1351 * $mapped_to_lines are what is actually compared
1352 * when computing the diff.
1353 *
1354 * @param $mapped_to_lines array This array should
1355 * have the same number of elements as $to_lines.
1356 */
1357 function MappedDiff($from_lines, $to_lines,
1358 $mapped_from_lines, $mapped_to_lines) {
1359 $fname = 'MappedDiff::MappedDiff';
1360 wfProfileIn( $fname );
1361
1362 assert(sizeof($from_lines) == sizeof($mapped_from_lines));
1363 assert(sizeof($to_lines) == sizeof($mapped_to_lines));
1364
1365 $this->Diff($mapped_from_lines, $mapped_to_lines);
1366
1367 $xi = $yi = 0;
1368 for ($i = 0; $i < sizeof($this->edits); $i++) {
1369 $orig = &$this->edits[$i]->orig;
1370 if (is_array($orig)) {
1371 $orig = array_slice($from_lines, $xi, sizeof($orig));
1372 $xi += sizeof($orig);
1373 }
1374
1375 $closing = &$this->edits[$i]->closing;
1376 if (is_array($closing)) {
1377 $closing = array_slice($to_lines, $yi, sizeof($closing));
1378 $yi += sizeof($closing);
1379 }
1380 }
1381 wfProfileOut( $fname );
1382 }
1383 }
1384
1385 /**
1386 * A class to format Diffs
1387 *
1388 * This class formats the diff in classic diff format.
1389 * It is intended that this class be customized via inheritance,
1390 * to obtain fancier outputs.
1391 * @todo document
1392 * @private
1393 * @package MediaWiki
1394 * @subpackage DifferenceEngine
1395 */
1396 class DiffFormatter
1397 {
1398 /**
1399 * Number of leading context "lines" to preserve.
1400 *
1401 * This should be left at zero for this class, but subclasses
1402 * may want to set this to other values.
1403 */
1404 var $leading_context_lines = 0;
1405
1406 /**
1407 * Number of trailing context "lines" to preserve.
1408 *
1409 * This should be left at zero for this class, but subclasses
1410 * may want to set this to other values.
1411 */
1412 var $trailing_context_lines = 0;
1413
1414 /**
1415 * Format a diff.
1416 *
1417 * @param $diff object A Diff object.
1418 * @return string The formatted output.
1419 */
1420 function format($diff) {
1421 $fname = 'DiffFormatter::format';
1422 wfProfileIn( $fname );
1423
1424 $xi = $yi = 1;
1425 $block = false;
1426 $context = array();
1427
1428 $nlead = $this->leading_context_lines;
1429 $ntrail = $this->trailing_context_lines;
1430
1431 $this->_start_diff();
1432
1433 foreach ($diff->edits as $edit) {
1434 if ($edit->type == 'copy') {
1435 if (is_array($block)) {
1436 if (sizeof($edit->orig) <= $nlead + $ntrail) {
1437 $block[] = $edit;
1438 }
1439 else{
1440 if ($ntrail) {
1441 $context = array_slice($edit->orig, 0, $ntrail);
1442 $block[] = new _DiffOp_Copy($context);
1443 }
1444 $this->_block($x0, $ntrail + $xi - $x0,
1445 $y0, $ntrail + $yi - $y0,
1446 $block);
1447 $block = false;
1448 }
1449 }
1450 $context = $edit->orig;
1451 }
1452 else {
1453 if (! is_array($block)) {
1454 $context = array_slice($context, sizeof($context) - $nlead);
1455 $x0 = $xi - sizeof($context);
1456 $y0 = $yi - sizeof($context);
1457 $block = array();
1458 if ($context)
1459 $block[] = new _DiffOp_Copy($context);
1460 }
1461 $block[] = $edit;
1462 }
1463
1464 if ($edit->orig)
1465 $xi += sizeof($edit->orig);
1466 if ($edit->closing)
1467 $yi += sizeof($edit->closing);
1468 }
1469
1470 if (is_array($block))
1471 $this->_block($x0, $xi - $x0,
1472 $y0, $yi - $y0,
1473 $block);
1474
1475 $end = $this->_end_diff();
1476 wfProfileOut( $fname );
1477 return $end;
1478 }
1479
1480 function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
1481 $fname = 'DiffFormatter::_block';
1482 wfProfileIn( $fname );
1483 $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
1484 foreach ($edits as $edit) {
1485 if ($edit->type == 'copy')
1486 $this->_context($edit->orig);
1487 elseif ($edit->type == 'add')
1488 $this->_added($edit->closing);
1489 elseif ($edit->type == 'delete')
1490 $this->_deleted($edit->orig);
1491 elseif ($edit->type == 'change')
1492 $this->_changed($edit->orig, $edit->closing);
1493 else
1494 trigger_error('Unknown edit type', E_USER_ERROR);
1495 }
1496 $this->_end_block();
1497 wfProfileOut( $fname );
1498 }
1499
1500 function _start_diff() {
1501 ob_start();
1502 }
1503
1504 function _end_diff() {
1505 $val = ob_get_contents();
1506 ob_end_clean();
1507 return $val;
1508 }
1509
1510 function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1511 if ($xlen > 1)
1512 $xbeg .= "," . ($xbeg + $xlen - 1);
1513 if ($ylen > 1)
1514 $ybeg .= "," . ($ybeg + $ylen - 1);
1515
1516 return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
1517 }
1518
1519 function _start_block($header) {
1520 echo $header;
1521 }
1522
1523 function _end_block() {
1524 }
1525
1526 function _lines($lines, $prefix = ' ') {
1527 foreach ($lines as $line)
1528 echo "$prefix $line\n";
1529 }
1530
1531 function _context($lines) {
1532 $this->_lines($lines);
1533 }
1534
1535 function _added($lines) {
1536 $this->_lines($lines, '>');
1537 }
1538 function _deleted($lines) {
1539 $this->_lines($lines, '<');
1540 }
1541
1542 function _changed($orig, $closing) {
1543 $this->_deleted($orig);
1544 echo "---\n";
1545 $this->_added($closing);
1546 }
1547 }
1548
1549
1550 /**
1551 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
1552 *
1553 */
1554
1555 define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
1556
1557 /**
1558 * @todo document
1559 * @private
1560 * @package MediaWiki
1561 * @subpackage DifferenceEngine
1562 */
1563 class _HWLDF_WordAccumulator {
1564 function _HWLDF_WordAccumulator () {
1565 $this->_lines = array();
1566 $this->_line = '';
1567 $this->_group = '';
1568 $this->_tag = '';
1569 }
1570
1571 function _flushGroup ($new_tag) {
1572 if ($this->_group !== '') {
1573 if ($this->_tag == 'mark')
1574 $this->_line .= '<span class="diffchange">' .
1575 htmlspecialchars ( $this->_group ) . '</span>';
1576 else
1577 $this->_line .= htmlspecialchars ( $this->_group );
1578 }
1579 $this->_group = '';
1580 $this->_tag = $new_tag;
1581 }
1582
1583 function _flushLine ($new_tag) {
1584 $this->_flushGroup($new_tag);
1585 if ($this->_line != '')
1586 array_push ( $this->_lines, $this->_line );
1587 else
1588 # make empty lines visible by inserting an NBSP
1589 array_push ( $this->_lines, NBSP );
1590 $this->_line = '';
1591 }
1592
1593 function addWords ($words, $tag = '') {
1594 if ($tag != $this->_tag)
1595 $this->_flushGroup($tag);
1596
1597 foreach ($words as $word) {
1598 // new-line should only come as first char of word.
1599 if ($word == '')
1600 continue;
1601 if ($word[0] == "\n") {
1602 $this->_flushLine($tag);
1603 $word = substr($word, 1);
1604 }
1605 assert(!strstr($word, "\n"));
1606 $this->_group .= $word;
1607 }
1608 }
1609
1610 function getLines() {
1611 $this->_flushLine('~done');
1612 return $this->_lines;
1613 }
1614 }
1615
1616 /**
1617 * @todo document
1618 * @private
1619 * @package MediaWiki
1620 * @subpackage DifferenceEngine
1621 */
1622 class WordLevelDiff extends MappedDiff
1623 {
1624 const MAX_LINE_LENGTH = 10000;
1625
1626 function WordLevelDiff ($orig_lines, $closing_lines) {
1627 $fname = 'WordLevelDiff::WordLevelDiff';
1628 wfProfileIn( $fname );
1629
1630 list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
1631 list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
1632
1633 $this->MappedDiff($orig_words, $closing_words,
1634 $orig_stripped, $closing_stripped);
1635 wfProfileOut( $fname );
1636 }
1637
1638 function _split($lines) {
1639 $fname = 'WordLevelDiff::_split';
1640 wfProfileIn( $fname );
1641
1642 $words = array();
1643 $stripped = array();
1644 $first = true;
1645 foreach ( $lines as $line ) {
1646 # If the line is too long, just pretend the entire line is one big word
1647 # This prevents resource exhaustion problems
1648 if ( $first ) {
1649 $first = false;
1650 } else {
1651 $words[] = "\n";
1652 $stripped[] = "\n";
1653 }
1654 if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
1655 $words[] = $line;
1656 $stripped[] = $line;
1657 } else {
1658 $m = array();
1659 if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1660 $line, $m))
1661 {
1662 $words = array_merge( $words, $m[0] );
1663 $stripped = array_merge( $stripped, $m[1] );
1664 }
1665 }
1666 }
1667 wfProfileOut( $fname );
1668 return array($words, $stripped);
1669 }
1670
1671 function orig () {
1672 $fname = 'WordLevelDiff::orig';
1673 wfProfileIn( $fname );
1674 $orig = new _HWLDF_WordAccumulator;
1675
1676 foreach ($this->edits as $edit) {
1677 if ($edit->type == 'copy')
1678 $orig->addWords($edit->orig);
1679 elseif ($edit->orig)
1680 $orig->addWords($edit->orig, 'mark');
1681 }
1682 $lines = $orig->getLines();
1683 wfProfileOut( $fname );
1684 return $lines;
1685 }
1686
1687 function closing () {
1688 $fname = 'WordLevelDiff::closing';
1689 wfProfileIn( $fname );
1690 $closing = new _HWLDF_WordAccumulator;
1691
1692 foreach ($this->edits as $edit) {
1693 if ($edit->type == 'copy')
1694 $closing->addWords($edit->closing);
1695 elseif ($edit->closing)
1696 $closing->addWords($edit->closing, 'mark');
1697 }
1698 $lines = $closing->getLines();
1699 wfProfileOut( $fname );
1700 return $lines;
1701 }
1702 }
1703
1704 /**
1705 * Wikipedia Table style diff formatter.
1706 * @todo document
1707 * @private
1708 * @package MediaWiki
1709 * @subpackage DifferenceEngine
1710 */
1711 class TableDiffFormatter extends DiffFormatter
1712 {
1713 function TableDiffFormatter() {
1714 $this->leading_context_lines = 2;
1715 $this->trailing_context_lines = 2;
1716 }
1717
1718 function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
1719 $r = '<tr><td colspan="2" align="left"><strong><!--LINE '.$xbeg."--></strong></td>\n" .
1720 '<td colspan="2" align="left"><strong><!--LINE '.$ybeg."--></strong></td></tr>\n";
1721 return $r;
1722 }
1723
1724 function _start_block( $header ) {
1725 echo $header;
1726 }
1727
1728 function _end_block() {
1729 }
1730
1731 function _lines( $lines, $prefix=' ', $color='white' ) {
1732 }
1733
1734 # HTML-escape parameter before calling this
1735 function addedLine( $line ) {
1736 return "<td>+</td><td class='diff-addedline'>{$line}</td>";
1737 }
1738
1739 # HTML-escape parameter before calling this
1740 function deletedLine( $line ) {
1741 return "<td>-</td><td class='diff-deletedline'>{$line}</td>";
1742 }
1743
1744 # HTML-escape parameter before calling this
1745 function contextLine( $line ) {
1746 return "<td> </td><td class='diff-context'>{$line}</td>";
1747 }
1748
1749 function emptyLine() {
1750 return '<td colspan="2">&nbsp;</td>';
1751 }
1752
1753 function _added( $lines ) {
1754 foreach ($lines as $line) {
1755 echo '<tr>' . $this->emptyLine() .
1756 $this->addedLine( htmlspecialchars ( $line ) ) . "</tr>\n";
1757 }
1758 }
1759
1760 function _deleted($lines) {
1761 foreach ($lines as $line) {
1762 echo '<tr>' . $this->deletedLine( htmlspecialchars ( $line ) ) .
1763 $this->emptyLine() . "</tr>\n";
1764 }
1765 }
1766
1767 function _context( $lines ) {
1768 foreach ($lines as $line) {
1769 echo '<tr>' .
1770 $this->contextLine( htmlspecialchars ( $line ) ) .
1771 $this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n";
1772 }
1773 }
1774
1775 function _changed( $orig, $closing ) {
1776 $fname = 'TableDiffFormatter::_changed';
1777 wfProfileIn( $fname );
1778
1779 $diff = new WordLevelDiff( $orig, $closing );
1780 $del = $diff->orig();
1781 $add = $diff->closing();
1782
1783 # Notice that WordLevelDiff returns HTML-escaped output.
1784 # Hence, we will be calling addedLine/deletedLine without HTML-escaping.
1785
1786 while ( $line = array_shift( $del ) ) {
1787 $aline = array_shift( $add );
1788 echo '<tr>' . $this->deletedLine( $line ) .
1789 $this->addedLine( $aline ) . "</tr>\n";
1790 }
1791 foreach ($add as $line) { # If any leftovers
1792 echo '<tr>' . $this->emptyLine() .
1793 $this->addedLine( $line ) . "</tr>\n";
1794 }
1795 wfProfileOut( $fname );
1796 }
1797 }
1798
1799 ?>