Merge "rdbms: fix IDEA warnings in DatabaseMssql.php"
[lhc/web/wiklou.git] / includes / diff / TextSlotDiffRenderer.php
1 <?php
2 /**
3 * Renders a slot diff by doing a text diff on the native representation.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup DifferenceEngine
22 */
23
24 use MediaWiki\Shell\Shell;
25 use Wikimedia\Assert\Assert;
26
27 /**
28 * Renders a slot diff by doing a text diff on the native representation.
29 *
30 * If you want to use this without content objects (to call getTextDiff() on some
31 * non-content-related texts), obtain an instance with
32 * ContentHandler::getForModelID( CONTENT_MODEL_TEXT )
33 * ->getSlotDiffRenderer( RequestContext::getMain() )
34 *
35 * @ingroup DifferenceEngine
36 */
37 class TextSlotDiffRenderer extends SlotDiffRenderer {
38
39 /** Use the PHP diff implementation (DiffEngine). */
40 const ENGINE_PHP = 'php';
41
42 /** Use the wikidiff2 PHP module. */
43 const ENGINE_WIKIDIFF2 = 'wikidiff2';
44
45 /** Use an external executable. */
46 const ENGINE_EXTERNAL = 'external';
47
48 /** @var IBufferingStatsdDataFactory|null */
49 private $statsdDataFactory;
50
51 /** @var Language|null The language this content is in. */
52 private $language;
53
54 /**
55 * Number of paragraph moves the algorithm should attempt to detect.
56 * Only used with the wikidiff2 engine.
57 * @var int
58 * @see $wgWikiDiff2MovedParagraphDetectionCutoff
59 */
60 private $wikiDiff2MovedParagraphDetectionCutoff = 0;
61
62 /** @var string One of the ENGINE_* constants. */
63 private $engine = self::ENGINE_PHP;
64
65 /** @var string Path to an executable to be used as the diff engine. */
66 private $externalEngine;
67
68 /**
69 * Convenience helper to use getTextDiff without an instance.
70 * @param string $oldText
71 * @param string $newText
72 * @return string
73 */
74 public static function diff( $oldText, $newText ) {
75 /** @var $slotDiffRenderer TextSlotDiffRenderer */
76 $slotDiffRenderer = ContentHandler::getForModelID( CONTENT_MODEL_TEXT )
77 ->getSlotDiffRenderer( RequestContext::getMain() );
78 return $slotDiffRenderer->getTextDiff( $oldText, $newText );
79 }
80
81 public function setStatsdDataFactory( IBufferingStatsdDataFactory $statsdDataFactory ) {
82 $this->statsdDataFactory = $statsdDataFactory;
83 }
84
85 public function setLanguage( Language $language ) {
86 $this->language = $language;
87 }
88 /**
89 * @param int $cutoff
90 * @see $wgWikiDiff2MovedParagraphDetectionCutoff
91 */
92 public function setWikiDiff2MovedParagraphDetectionCutoff( $cutoff ) {
93 Assert::parameterType( 'integer', $cutoff, '$cutoff' );
94 $this->wikiDiff2MovedParagraphDetectionCutoff = $cutoff;
95 }
96
97 /**
98 * Set which diff engine to use.
99 * @param string $type One of the ENGINE_* constants.
100 * @param string|null $executable Path to an external exectable, only when type is ENGINE_EXTERNAL.
101 */
102 public function setEngine( $type, $executable = null ) {
103 $engines = [ self::ENGINE_PHP, self::ENGINE_WIKIDIFF2, self::ENGINE_EXTERNAL ];
104 Assert::parameter( in_array( $type, $engines, true ), '$type',
105 'must be one of the TextSlotDiffRenderer::ENGINE_* constants' );
106 if ( $type === self::ENGINE_EXTERNAL ) {
107 Assert::parameter( is_string( $executable ) && is_executable( $executable ), '$executable',
108 'must be a path to a valid executable' );
109 } else {
110 Assert::parameter( is_null( $executable ), '$executable',
111 'must not be set unless $type is ENGINE_EXTERNAL' );
112 }
113 $this->engine = $type;
114 $this->externalEngine = $executable;
115 }
116
117 /** @inheritDoc */
118 public function getDiff( Content $oldContent = null, Content $newContent = null ) {
119 if ( !$oldContent && !$newContent ) {
120 throw new InvalidArgumentException( '$oldContent and $newContent cannot both be null' );
121 } elseif ( $oldContent && !( $oldContent instanceof TextContent ) ) {
122 throw new InvalidArgumentException( __CLASS__ . ' does not handle ' . get_class( $oldContent ) );
123 } elseif ( $newContent && !( $newContent instanceof TextContent ) ) {
124 throw new InvalidArgumentException( __CLASS__ . ' does not handle ' . get_class( $newContent ) );
125 }
126
127 if ( !$oldContent ) {
128 $oldContent = $newContent->getContentHandler()->makeEmptyContent();
129 } elseif ( !$newContent ) {
130 $newContent = $oldContent->getContentHandler()->makeEmptyContent();
131 }
132
133 $oldText = $oldContent->serialize();
134 $newText = $newContent->serialize();
135
136 return $this->getTextDiff( $oldText, $newText );
137 }
138
139 /**
140 * Diff the text representations of two content objects (or just two pieces of text in general).
141 * @param string $oldText
142 * @param string $newText
143 * @return string
144 */
145 public function getTextDiff( $oldText, $newText ) {
146 Assert::parameterType( 'string', $oldText, '$oldText' );
147 Assert::parameterType( 'string', $newText, '$newText' );
148
149 $diff = function () use ( $oldText, $newText ) {
150 $time = microtime( true );
151
152 $result = $this->getTextDiffInternal( $oldText, $newText );
153
154 $time = intval( ( microtime( true ) - $time ) * 1000 );
155 if ( $this->statsdDataFactory ) {
156 $this->statsdDataFactory->timing( 'diff_time', $time );
157 }
158
159 // TODO reimplement this using T142313
160 /*
161 // Log requests slower than 99th percentile
162 if ( $time > 100 && $this->mOldPage && $this->mNewPage ) {
163 wfDebugLog( 'diff',
164 "$time ms diff: {$this->mOldid} -> {$this->mNewid} {$this->mNewPage}" );
165 }
166 */
167
168 return $result;
169 };
170
171 /**
172 * @param Status $status
173 * @throws FatalError
174 */
175 $error = function ( $status ) {
176 throw new FatalError( $status->getWikiText() );
177 };
178
179 // Use PoolCounter if the diff looks like it can be expensive
180 if ( strlen( $oldText ) + strlen( $newText ) > 20000 ) {
181 $work = new PoolCounterWorkViaCallback( 'diff',
182 md5( $oldText ) . md5( $newText ),
183 [ 'doWork' => $diff, 'error' => $error ]
184 );
185 return $work->execute();
186 }
187
188 return $diff();
189 }
190
191 /**
192 * Diff the text representations of two content objects (or just two pieces of text in general).
193 * This does the actual diffing, getTextDiff() wraps it with logging and resource limiting.
194 * @param string $oldText
195 * @param string $newText
196 * @return string
197 * @throws Exception
198 */
199 protected function getTextDiffInternal( $oldText, $newText ) {
200 // TODO move most of this into three parallel implementations of a text diff generator
201 // class, choose which one to use via dependecy injection
202
203 $oldText = str_replace( "\r\n", "\n", $oldText );
204 $newText = str_replace( "\r\n", "\n", $newText );
205
206 // Better external diff engine, the 2 may some day be dropped
207 // This one does the escaping and segmenting itself
208 if ( $this->engine === self::ENGINE_WIKIDIFF2 ) {
209 $wikidiff2Version = phpversion( 'wikidiff2' );
210 if (
211 $wikidiff2Version !== false &&
212 version_compare( $wikidiff2Version, '1.5.0', '>=' )
213 ) {
214 $text = wikidiff2_do_diff(
215 $oldText,
216 $newText,
217 2,
218 $this->wikiDiff2MovedParagraphDetectionCutoff
219 );
220 } else {
221 // Don't pass the 4th parameter for compatibility with older versions of wikidiff2
222 $text = wikidiff2_do_diff(
223 $oldText,
224 $newText,
225 2
226 );
227
228 // Log a warning in case the configuration value is set to not silently ignore it
229 if ( $this->wikiDiff2MovedParagraphDetectionCutoff > 0 ) {
230 wfLogWarning( '$wgWikiDiff2MovedParagraphDetectionCutoff is set but has no
231 effect since the used version of WikiDiff2 does not support it.' );
232 }
233 }
234
235 return $text;
236 } elseif ( $this->engine === self::ENGINE_EXTERNAL ) {
237 # Diff via the shell
238 $tmpDir = wfTempDir();
239 $tempName1 = tempnam( $tmpDir, 'diff_' );
240 $tempName2 = tempnam( $tmpDir, 'diff_' );
241
242 $tempFile1 = fopen( $tempName1, "w" );
243 if ( !$tempFile1 ) {
244 return false;
245 }
246 $tempFile2 = fopen( $tempName2, "w" );
247 if ( !$tempFile2 ) {
248 return false;
249 }
250 fwrite( $tempFile1, $oldText );
251 fwrite( $tempFile2, $newText );
252 fclose( $tempFile1 );
253 fclose( $tempFile2 );
254 $cmd = [ $this->externalEngine, $tempName1, $tempName2 ];
255 $result = Shell::command( $cmd )
256 ->execute();
257 $exitCode = $result->getExitCode();
258 if ( $exitCode !== 0 ) {
259 throw new Exception( "External diff command returned code {$exitCode}. Stderr: "
260 . wfEscapeWikiText( $result->getStderr() )
261 );
262 }
263 $difftext = $result->getStdout();
264 unlink( $tempName1 );
265 unlink( $tempName2 );
266
267 return $difftext;
268 } elseif ( $this->engine === self::ENGINE_PHP ) {
269 if ( $this->language ) {
270 $oldText = $this->language->segmentForDiff( $oldText );
271 $newText = $this->language->segmentForDiff( $newText );
272 }
273 $ota = explode( "\n", $oldText );
274 $nta = explode( "\n", $newText );
275 $diffs = new Diff( $ota, $nta );
276 $formatter = new TableDiffFormatter();
277 $difftext = $formatter->format( $diffs );
278 if ( $this->language ) {
279 $difftext = $this->language->unsegmentForDiff( $difftext );
280 }
281
282 return $difftext;
283 }
284 throw new LogicException( 'Invalid engine: ' . $this->engine );
285 }
286
287 }