Merge "Fix return in HttpStatus::header"
[lhc/web/wiklou.git] / includes / CommentStore.php
1 <?php
2 /**
3 * Manage storage of comments in the database
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 use Wikimedia\Rdbms\IDatabase;
24
25 /**
26 * CommentStore handles storage of comments (edit summaries, log reasons, etc)
27 * in the database.
28 * @since 1.30
29 */
30 class CommentStore {
31
32 /** Maximum length of a comment. Longer comments will be truncated. */
33 const MAX_COMMENT_LENGTH = 65535;
34
35 /** Maximum length of serialized data. Longer data will result in an exception. */
36 const MAX_DATA_LENGTH = 65535;
37
38 /**
39 * Define fields that use temporary tables for transitional purposes
40 * @var array Keys are '$key', values are arrays with four fields:
41 * - table: Temporary table name
42 * - pk: Temporary table column referring to the main table's primary key
43 * - field: Temporary table column referring comment.comment_id
44 * - joinPK: Main table's primary key
45 */
46 protected static $tempTables = [
47 'rev_comment' => [
48 'table' => 'revision_comment_temp',
49 'pk' => 'revcomment_rev',
50 'field' => 'revcomment_comment_id',
51 'joinPK' => 'rev_id',
52 ],
53 'img_description' => [
54 'table' => 'image_comment_temp',
55 'pk' => 'imgcomment_name',
56 'field' => 'imgcomment_description_id',
57 'joinPK' => 'img_name',
58 ],
59 ];
60
61 /**
62 * Fields that formerly used $tempTables
63 * @var array Key is '$key', value is the MediaWiki version in which it was
64 * removed from $tempTables.
65 */
66 protected static $formerTempTables = [];
67
68 /** @var string */
69 protected $key;
70
71 /** @var int One of the MIGRATION_* constants */
72 protected $stage;
73
74 /** @var array|null Cache for `self::getJoin()` */
75 protected $joinCache = null;
76
77 /** @var Language Language to use for comment truncation */
78 protected $lang;
79
80 /**
81 * @param string $key A key such as "rev_comment" identifying the comment
82 * field being fetched.
83 * @param Language $lang Language to use for comment truncation. Defaults
84 * to $wgContLang.
85 */
86 public function __construct( $key, Language $lang = null ) {
87 global $wgCommentTableSchemaMigrationStage, $wgContLang;
88
89 $this->key = $key;
90 $this->stage = $wgCommentTableSchemaMigrationStage;
91 $this->lang = $lang ?: $wgContLang;
92 }
93
94 /**
95 * Static constructor for easier chaining
96 * @param string $key A key such as "rev_comment" identifying the comment
97 * field being fetched.
98 * @return CommentStore
99 */
100 public static function newKey( $key ) {
101 return new CommentStore( $key );
102 }
103
104 /**
105 * Get SELECT fields for the comment key
106 *
107 * Each resulting row should be passed to `self::getCommentLegacy()` to get the
108 * actual comment.
109 *
110 * @note Use of this method may require a subsequent database query to
111 * actually fetch the comment. If possible, use `self::getJoin()` instead.
112 * @return string[] to include in the `$vars` to `IDatabase->select()`. All
113 * fields are aliased, so `+` is safe to use.
114 */
115 public function getFields() {
116 $fields = [];
117 if ( $this->stage === MIGRATION_OLD ) {
118 $fields["{$this->key}_text"] = $this->key;
119 $fields["{$this->key}_data"] = 'NULL';
120 $fields["{$this->key}_cid"] = 'NULL';
121 } else {
122 if ( $this->stage < MIGRATION_NEW ) {
123 $fields["{$this->key}_old"] = $this->key;
124 }
125 if ( isset( self::$tempTables[$this->key] ) ) {
126 $fields["{$this->key}_pk"] = self::$tempTables[$this->key]['joinPK'];
127 } else {
128 $fields["{$this->key}_id"] = "{$this->key}_id";
129 }
130 }
131 return $fields;
132 }
133
134 /**
135 * Get SELECT fields and joins for the comment key
136 *
137 * Each resulting row should be passed to `self::getComment()` to get the
138 * actual comment.
139 *
140 * @return array With three keys:
141 * - tables: (string[]) to include in the `$table` to `IDatabase->select()`
142 * - fields: (string[]) to include in the `$vars` to `IDatabase->select()`
143 * - joins: (array) to include in the `$join_conds` to `IDatabase->select()`
144 * All tables, fields, and joins are aliased, so `+` is safe to use.
145 */
146 public function getJoin() {
147 if ( $this->joinCache === null ) {
148 $tables = [];
149 $fields = [];
150 $joins = [];
151
152 if ( $this->stage === MIGRATION_OLD ) {
153 $fields["{$this->key}_text"] = $this->key;
154 $fields["{$this->key}_data"] = 'NULL';
155 $fields["{$this->key}_cid"] = 'NULL';
156 } else {
157 $join = $this->stage === MIGRATION_NEW ? 'JOIN' : 'LEFT JOIN';
158
159 if ( isset( self::$tempTables[$this->key] ) ) {
160 $t = self::$tempTables[$this->key];
161 $alias = "temp_$this->key";
162 $tables[$alias] = $t['table'];
163 $joins[$alias] = [ $join, "{$alias}.{$t['pk']} = {$t['joinPK']}" ];
164 $joinField = "{$alias}.{$t['field']}";
165 } else {
166 $joinField = "{$this->key}_id";
167 }
168
169 $alias = "comment_$this->key";
170 $tables[$alias] = 'comment';
171 $joins[$alias] = [ $join, "{$alias}.comment_id = {$joinField}" ];
172
173 if ( $this->stage === MIGRATION_NEW ) {
174 $fields["{$this->key}_text"] = "{$alias}.comment_text";
175 } else {
176 $fields["{$this->key}_text"] = "COALESCE( {$alias}.comment_text, $this->key )";
177 }
178 $fields["{$this->key}_data"] = "{$alias}.comment_data";
179 $fields["{$this->key}_cid"] = "{$alias}.comment_id";
180 }
181
182 $this->joinCache = [
183 'tables' => $tables,
184 'fields' => $fields,
185 'joins' => $joins,
186 ];
187 }
188
189 return $this->joinCache;
190 }
191
192 /**
193 * Extract the comment from a row
194 *
195 * Shared implementation for getComment() and getCommentLegacy()
196 *
197 * @param IDatabase|null $db Database handle for getCommentLegacy(), or null for getComment()
198 * @param object|array $row
199 * @param bool $fallback
200 * @return CommentStoreComment
201 */
202 private function getCommentInternal( IDatabase $db = null, $row, $fallback = false ) {
203 $key = $this->key;
204 $row = (array)$row;
205 if ( array_key_exists( "{$key}_text", $row ) && array_key_exists( "{$key}_data", $row ) ) {
206 $cid = isset( $row["{$key}_cid"] ) ? $row["{$key}_cid"] : null;
207 $text = $row["{$key}_text"];
208 $data = $row["{$key}_data"];
209 } elseif ( $this->stage === MIGRATION_OLD ) {
210 $cid = null;
211 if ( $fallback && isset( $row[$key] ) ) {
212 wfLogWarning( "Using deprecated fallback handling for comment $key" );
213 $text = $row[$key];
214 } else {
215 wfLogWarning( "Missing {$key}_text and {$key}_data fields in row with MIGRATION_OLD" );
216 $text = '';
217 }
218 $data = null;
219 } else {
220 if ( isset( self::$tempTables[$key] ) ) {
221 if ( array_key_exists( "{$key}_pk", $row ) ) {
222 if ( !$db ) {
223 throw new InvalidArgumentException(
224 "\$row does not contain fields needed for comment $key and getComment(), but "
225 . "does have fields for getCommentLegacy()"
226 );
227 }
228 $t = self::$tempTables[$key];
229 $id = $row["{$key}_pk"];
230 $row2 = $db->selectRow(
231 [ $t['table'], 'comment' ],
232 [ 'comment_id', 'comment_text', 'comment_data' ],
233 [ $t['pk'] => $id ],
234 __METHOD__,
235 [],
236 [ 'comment' => [ 'JOIN', [ "comment_id = {$t['field']}" ] ] ]
237 );
238 } elseif ( $fallback && isset( $row[$key] ) ) {
239 wfLogWarning( "Using deprecated fallback handling for comment $key" );
240 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
241 } else {
242 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
243 }
244 } else {
245 if ( array_key_exists( "{$key}_id", $row ) ) {
246 if ( !$db ) {
247 throw new InvalidArgumentException(
248 "\$row does not contain fields needed for comment $key and getComment(), but "
249 . "does have fields for getCommentLegacy()"
250 );
251 }
252 $id = $row["{$key}_id"];
253 $row2 = $db->selectRow(
254 'comment',
255 [ 'comment_id', 'comment_text', 'comment_data' ],
256 [ 'comment_id' => $id ],
257 __METHOD__
258 );
259 } elseif ( $fallback && isset( $row[$key] ) ) {
260 wfLogWarning( "Using deprecated fallback handling for comment $key" );
261 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
262 } else {
263 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
264 }
265 }
266
267 if ( $row2 ) {
268 $cid = $row2->comment_id;
269 $text = $row2->comment_text;
270 $data = $row2->comment_data;
271 } elseif ( $this->stage < MIGRATION_NEW && array_key_exists( "{$key}_old", $row ) ) {
272 $cid = null;
273 $text = $row["{$key}_old"];
274 $data = null;
275 } else {
276 // @codeCoverageIgnoreStart
277 wfLogWarning( "Missing comment row for $key, id=$id" );
278 $cid = null;
279 $text = '';
280 $data = null;
281 // @codeCoverageIgnoreEnd
282 }
283 }
284
285 $msg = null;
286 if ( $data !== null ) {
287 $data = FormatJson::decode( $data );
288 if ( !is_object( $data ) ) {
289 // @codeCoverageIgnoreStart
290 wfLogWarning( "Invalid JSON object in comment: $data" );
291 $data = null;
292 // @codeCoverageIgnoreEnd
293 } else {
294 $data = (array)$data;
295 if ( isset( $data['_message'] ) ) {
296 $msg = self::decodeMessage( $data['_message'] )
297 ->setInterfaceMessageFlag( true );
298 }
299 if ( !empty( $data['_null'] ) ) {
300 $data = null;
301 } else {
302 foreach ( $data as $k => $v ) {
303 if ( substr( $k, 0, 1 ) === '_' ) {
304 unset( $data[$k] );
305 }
306 }
307 }
308 }
309 }
310
311 return new CommentStoreComment( $cid, $text, $msg, $data );
312 }
313
314 /**
315 * Extract the comment from a row
316 *
317 * Use `self::getJoin()` to ensure the row contains the needed data.
318 *
319 * If you need to fake a comment in a row for some reason, set fields
320 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
321 *
322 * @param object|array $row Result row.
323 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
324 * @return CommentStoreComment
325 */
326 public function getComment( $row, $fallback = false ) {
327 return $this->getCommentInternal( null, $row, $fallback );
328 }
329
330 /**
331 * Extract the comment from a row, with legacy lookups.
332 *
333 * If `$row` might have been generated using `self::getFields()` rather
334 * than `self::getJoin()`, use this. Prefer `self::getComment()` if you
335 * know callers used `self::getJoin()` for the row fetch.
336 *
337 * If you need to fake a comment in a row for some reason, set fields
338 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
339 *
340 * @param IDatabase $db Database handle to use for lookup
341 * @param object|array $row Result row.
342 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
343 * @return CommentStoreComment
344 */
345 public function getCommentLegacy( IDatabase $db, $row, $fallback = false ) {
346 return $this->getCommentInternal( $db, $row, $fallback );
347 }
348
349 /**
350 * Create a new CommentStoreComment, inserting it into the database if necessary
351 *
352 * If a comment is going to be passed to `self::insert()` or the like
353 * multiple times, it will be more efficient to pass a CommentStoreComment
354 * once rather than making `self::insert()` do it every time through.
355 *
356 * @note When passing a CommentStoreComment, this may set `$comment->id` if
357 * it's not already set. If `$comment->id` is already set, it will not be
358 * verified that the specified comment actually exists or that it
359 * corresponds to the comment text, message, and/or data in the
360 * CommentStoreComment.
361 * @param IDatabase $dbw Database handle to insert on. Unused if `$comment`
362 * is a CommentStoreComment and `$comment->id` is set.
363 * @param string|Message|CommentStoreComment $comment Comment text or Message object, or
364 * a CommentStoreComment.
365 * @param array|null $data Structured data to store. Keys beginning with '_' are reserved.
366 * Ignored if $comment is a CommentStoreComment.
367 * @return CommentStoreComment
368 */
369 public function createComment( IDatabase $dbw, $comment, array $data = null ) {
370 $comment = CommentStoreComment::newUnsavedComment( $comment, $data );
371
372 # Truncate comment in a Unicode-sensitive manner
373 $comment->text = $this->lang->truncate( $comment->text, self::MAX_COMMENT_LENGTH );
374
375 if ( $this->stage > MIGRATION_OLD && !$comment->id ) {
376 $dbData = $comment->data;
377 if ( !$comment->message instanceof RawMessage ) {
378 if ( $dbData === null ) {
379 $dbData = [ '_null' => true ];
380 }
381 $dbData['_message'] = self::encodeMessage( $comment->message );
382 }
383 if ( $dbData !== null ) {
384 $dbData = FormatJson::encode( (object)$dbData, false, FormatJson::ALL_OK );
385 $len = strlen( $dbData );
386 if ( $len > self::MAX_DATA_LENGTH ) {
387 $max = self::MAX_DATA_LENGTH;
388 throw new OverflowException( "Comment data is too long ($len bytes, maximum is $max)" );
389 }
390 }
391
392 $hash = self::hash( $comment->text, $dbData );
393 $comment->id = $dbw->selectField(
394 'comment',
395 'comment_id',
396 [
397 'comment_hash' => $hash,
398 'comment_text' => $comment->text,
399 'comment_data' => $dbData,
400 ],
401 __METHOD__
402 );
403 if ( !$comment->id ) {
404 $dbw->insert(
405 'comment',
406 [
407 'comment_hash' => $hash,
408 'comment_text' => $comment->text,
409 'comment_data' => $dbData,
410 ],
411 __METHOD__
412 );
413 $comment->id = $dbw->insertId();
414 }
415 }
416
417 return $comment;
418 }
419
420 /**
421 * Implementation for `self::insert()` and `self::insertWithTempTable()`
422 * @param IDatabase $dbw
423 * @param string|Message|CommentStoreComment $comment
424 * @param array|null $data
425 * @return array [ array $fields, callable $callback ]
426 */
427 private function insertInternal( IDatabase $dbw, $comment, $data ) {
428 $fields = [];
429 $callback = null;
430
431 $comment = $this->createComment( $dbw, $comment, $data );
432
433 if ( $this->stage <= MIGRATION_WRITE_BOTH ) {
434 $fields[$this->key] = $this->lang->truncate( $comment->text, 255 );
435 }
436
437 if ( $this->stage >= MIGRATION_WRITE_BOTH ) {
438 if ( isset( self::$tempTables[$this->key] ) ) {
439 $t = self::$tempTables[$this->key];
440 $func = __METHOD__;
441 $commentId = $comment->id;
442 $callback = function ( $id ) use ( $dbw, $commentId, $t, $func ) {
443 $dbw->insert(
444 $t['table'],
445 [
446 $t['pk'] => $id,
447 $t['field'] => $commentId,
448 ],
449 $func
450 );
451 };
452 } else {
453 $fields["{$this->key}_id"] = $comment->id;
454 }
455 }
456
457 return [ $fields, $callback ];
458 }
459
460 /**
461 * Insert a comment in preparation for a row that references it
462 *
463 * @note It's recommended to include both the call to this method and the
464 * row insert in the same transaction.
465 * @param IDatabase $dbw Database handle to insert on
466 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
467 * @param array|null $data As for `self::createComment()`
468 * @return array Fields for the insert or update
469 */
470 public function insert( IDatabase $dbw, $comment, $data = null ) {
471 if ( isset( self::$tempTables[$this->key] ) ) {
472 throw new InvalidArgumentException( "Must use insertWithTempTable() for $this->key" );
473 }
474
475 list( $fields ) = $this->insertInternal( $dbw, $comment, $data );
476 return $fields;
477 }
478
479 /**
480 * Insert a comment in a temporary table in preparation for a row that references it
481 *
482 * This is currently needed for "rev_comment" and "img_description". In the
483 * future that requirement will be removed.
484 *
485 * @note It's recommended to include both the call to this method and the
486 * row insert in the same transaction.
487 * @param IDatabase $dbw Database handle to insert on
488 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
489 * @param array|null $data As for `self::createComment()`
490 * @return array Two values:
491 * - array Fields for the insert or update
492 * - callable Function to call when the primary key of the row being
493 * inserted/updated is known. Pass it that primary key.
494 */
495 public function insertWithTempTable( IDatabase $dbw, $comment, $data = null ) {
496 if ( isset( self::$formerTempTables[$this->key] ) ) {
497 wfDeprecated( __METHOD__ . " for $this->key", self::$formerTempTables[$this->key] );
498 } elseif ( !isset( self::$tempTables[$this->key] ) ) {
499 throw new InvalidArgumentException( "Must use insert() for $this->key" );
500 }
501
502 list( $fields, $callback ) = $this->insertInternal( $dbw, $comment, $data );
503 if ( !$callback ) {
504 $callback = function () {
505 // Do nothing.
506 };
507 }
508 return [ $fields, $callback ];
509 }
510
511 /**
512 * Encode a Message as a PHP data structure
513 * @param Message $msg
514 * @return array
515 */
516 protected static function encodeMessage( Message $msg ) {
517 $key = count( $msg->getKeysToTry() ) > 1 ? $msg->getKeysToTry() : $msg->getKey();
518 $params = $msg->getParams();
519 foreach ( $params as &$param ) {
520 if ( $param instanceof Message ) {
521 $param = [
522 'message' => self::encodeMessage( $param )
523 ];
524 }
525 }
526 array_unshift( $params, $key );
527 return $params;
528 }
529
530 /**
531 * Decode a message that was encoded by self::encodeMessage()
532 * @param array $data
533 * @return Message
534 */
535 protected static function decodeMessage( $data ) {
536 $key = array_shift( $data );
537 foreach ( $data as &$param ) {
538 if ( is_object( $param ) ) {
539 $param = (array)$param;
540 }
541 if ( is_array( $param ) && count( $param ) === 1 && isset( $param['message'] ) ) {
542 $param = self::decodeMessage( $param['message'] );
543 }
544 }
545 return new Message( $key, $data );
546 }
547
548 /**
549 * Hashing function for comment storage
550 * @param string $text Comment text
551 * @param string|null $data Comment data
552 * @return int 32-bit signed integer
553 */
554 public static function hash( $text, $data ) {
555 $hash = crc32( $text ) ^ crc32( (string)$data );
556
557 // 64-bit PHP returns an unsigned CRC, change it to signed for
558 // insertion into the database.
559 if ( $hash >= 0x80000000 ) {
560 $hash |= -1 << 32;
561 }
562
563 return $hash;
564 }
565
566 }