Merge "Skin: Make skins aware of their registered skin name"
[lhc/web/wiklou.git] / includes / CommentStore.php
1 <?php
2 /**
3 * Manage storage of comments in the database
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 use Wikimedia\Rdbms\IDatabase;
24
25 /**
26 * CommentStore handles storage of comments (edit summaries, log reasons, etc)
27 * in the database.
28 * @since 1.30
29 */
30 class CommentStore {
31
32 /**
33 * Maximum length of a comment in UTF-8 characters. Longer comments will be truncated.
34 * @note This must be at least 255 and not greater than floor( MAX_COMMENT_LENGTH / 4 ).
35 */
36 const COMMENT_CHARACTER_LIMIT = 1000;
37
38 /**
39 * Maximum length of a comment in bytes. Longer comments will be truncated.
40 * @note This value is determined by the size of the underlying database field,
41 * currently BLOB in MySQL/MariaDB.
42 */
43 const MAX_COMMENT_LENGTH = 65535;
44
45 /**
46 * Maximum length of serialized data in bytes. Longer data will result in an exception.
47 * @note This value is determined by the size of the underlying database field,
48 * currently BLOB in MySQL/MariaDB.
49 */
50 const MAX_DATA_LENGTH = 65535;
51
52 /**
53 * Define fields that use temporary tables for transitional purposes
54 * @var array Keys are '$key', values are arrays with four fields:
55 * - table: Temporary table name
56 * - pk: Temporary table column referring to the main table's primary key
57 * - field: Temporary table column referring comment.comment_id
58 * - joinPK: Main table's primary key
59 */
60 protected static $tempTables = [
61 'rev_comment' => [
62 'table' => 'revision_comment_temp',
63 'pk' => 'revcomment_rev',
64 'field' => 'revcomment_comment_id',
65 'joinPK' => 'rev_id',
66 ],
67 'img_description' => [
68 'table' => 'image_comment_temp',
69 'pk' => 'imgcomment_name',
70 'field' => 'imgcomment_description_id',
71 'joinPK' => 'img_name',
72 ],
73 ];
74
75 /**
76 * Fields that formerly used $tempTables
77 * @var array Key is '$key', value is the MediaWiki version in which it was
78 * removed from $tempTables.
79 */
80 protected static $formerTempTables = [];
81
82 /** @var string */
83 protected $key;
84
85 /** @var int One of the MIGRATION_* constants */
86 protected $stage;
87
88 /** @var array|null Cache for `self::getJoin()` */
89 protected $joinCache = null;
90
91 /** @var Language Language to use for comment truncation */
92 protected $lang;
93
94 /**
95 * @param string $key A key such as "rev_comment" identifying the comment
96 * field being fetched.
97 * @param Language $lang Language to use for comment truncation. Defaults
98 * to $wgContLang.
99 */
100 public function __construct( $key, Language $lang = null ) {
101 global $wgCommentTableSchemaMigrationStage, $wgContLang;
102
103 $this->key = $key;
104 $this->stage = $wgCommentTableSchemaMigrationStage;
105 $this->lang = $lang ?: $wgContLang;
106 }
107
108 /**
109 * Static constructor for easier chaining
110 * @param string $key A key such as "rev_comment" identifying the comment
111 * field being fetched.
112 * @return CommentStore
113 */
114 public static function newKey( $key ) {
115 return new CommentStore( $key );
116 }
117
118 /**
119 * Get SELECT fields for the comment key
120 *
121 * Each resulting row should be passed to `self::getCommentLegacy()` to get the
122 * actual comment.
123 *
124 * @note Use of this method may require a subsequent database query to
125 * actually fetch the comment. If possible, use `self::getJoin()` instead.
126 * @return string[] to include in the `$vars` to `IDatabase->select()`. All
127 * fields are aliased, so `+` is safe to use.
128 */
129 public function getFields() {
130 $fields = [];
131 if ( $this->stage === MIGRATION_OLD ) {
132 $fields["{$this->key}_text"] = $this->key;
133 $fields["{$this->key}_data"] = 'NULL';
134 $fields["{$this->key}_cid"] = 'NULL';
135 } else {
136 if ( $this->stage < MIGRATION_NEW ) {
137 $fields["{$this->key}_old"] = $this->key;
138 }
139 if ( isset( self::$tempTables[$this->key] ) ) {
140 $fields["{$this->key}_pk"] = self::$tempTables[$this->key]['joinPK'];
141 } else {
142 $fields["{$this->key}_id"] = "{$this->key}_id";
143 }
144 }
145 return $fields;
146 }
147
148 /**
149 * Get SELECT fields and joins for the comment key
150 *
151 * Each resulting row should be passed to `self::getComment()` to get the
152 * actual comment.
153 *
154 * @return array With three keys:
155 * - tables: (string[]) to include in the `$table` to `IDatabase->select()`
156 * - fields: (string[]) to include in the `$vars` to `IDatabase->select()`
157 * - joins: (array) to include in the `$join_conds` to `IDatabase->select()`
158 * All tables, fields, and joins are aliased, so `+` is safe to use.
159 */
160 public function getJoin() {
161 if ( $this->joinCache === null ) {
162 $tables = [];
163 $fields = [];
164 $joins = [];
165
166 if ( $this->stage === MIGRATION_OLD ) {
167 $fields["{$this->key}_text"] = $this->key;
168 $fields["{$this->key}_data"] = 'NULL';
169 $fields["{$this->key}_cid"] = 'NULL';
170 } else {
171 $join = $this->stage === MIGRATION_NEW ? 'JOIN' : 'LEFT JOIN';
172
173 if ( isset( self::$tempTables[$this->key] ) ) {
174 $t = self::$tempTables[$this->key];
175 $alias = "temp_$this->key";
176 $tables[$alias] = $t['table'];
177 $joins[$alias] = [ $join, "{$alias}.{$t['pk']} = {$t['joinPK']}" ];
178 $joinField = "{$alias}.{$t['field']}";
179 } else {
180 $joinField = "{$this->key}_id";
181 }
182
183 $alias = "comment_$this->key";
184 $tables[$alias] = 'comment';
185 $joins[$alias] = [ $join, "{$alias}.comment_id = {$joinField}" ];
186
187 if ( $this->stage === MIGRATION_NEW ) {
188 $fields["{$this->key}_text"] = "{$alias}.comment_text";
189 } else {
190 $fields["{$this->key}_text"] = "COALESCE( {$alias}.comment_text, $this->key )";
191 }
192 $fields["{$this->key}_data"] = "{$alias}.comment_data";
193 $fields["{$this->key}_cid"] = "{$alias}.comment_id";
194 }
195
196 $this->joinCache = [
197 'tables' => $tables,
198 'fields' => $fields,
199 'joins' => $joins,
200 ];
201 }
202
203 return $this->joinCache;
204 }
205
206 /**
207 * Extract the comment from a row
208 *
209 * Shared implementation for getComment() and getCommentLegacy()
210 *
211 * @param IDatabase|null $db Database handle for getCommentLegacy(), or null for getComment()
212 * @param object|array $row
213 * @param bool $fallback
214 * @return CommentStoreComment
215 */
216 private function getCommentInternal( IDatabase $db = null, $row, $fallback = false ) {
217 $key = $this->key;
218 $row = (array)$row;
219 if ( array_key_exists( "{$key}_text", $row ) && array_key_exists( "{$key}_data", $row ) ) {
220 $cid = isset( $row["{$key}_cid"] ) ? $row["{$key}_cid"] : null;
221 $text = $row["{$key}_text"];
222 $data = $row["{$key}_data"];
223 } elseif ( $this->stage === MIGRATION_OLD ) {
224 $cid = null;
225 if ( $fallback && isset( $row[$key] ) ) {
226 wfLogWarning( "Using deprecated fallback handling for comment $key" );
227 $text = $row[$key];
228 } else {
229 wfLogWarning( "Missing {$key}_text and {$key}_data fields in row with MIGRATION_OLD" );
230 $text = '';
231 }
232 $data = null;
233 } else {
234 if ( isset( self::$tempTables[$key] ) ) {
235 if ( array_key_exists( "{$key}_pk", $row ) ) {
236 if ( !$db ) {
237 throw new InvalidArgumentException(
238 "\$row does not contain fields needed for comment $key and getComment(), but "
239 . "does have fields for getCommentLegacy()"
240 );
241 }
242 $t = self::$tempTables[$key];
243 $id = $row["{$key}_pk"];
244 $row2 = $db->selectRow(
245 [ $t['table'], 'comment' ],
246 [ 'comment_id', 'comment_text', 'comment_data' ],
247 [ $t['pk'] => $id ],
248 __METHOD__,
249 [],
250 [ 'comment' => [ 'JOIN', [ "comment_id = {$t['field']}" ] ] ]
251 );
252 } elseif ( $fallback && isset( $row[$key] ) ) {
253 wfLogWarning( "Using deprecated fallback handling for comment $key" );
254 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
255 } else {
256 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
257 }
258 } else {
259 if ( array_key_exists( "{$key}_id", $row ) ) {
260 if ( !$db ) {
261 throw new InvalidArgumentException(
262 "\$row does not contain fields needed for comment $key and getComment(), but "
263 . "does have fields for getCommentLegacy()"
264 );
265 }
266 $id = $row["{$key}_id"];
267 $row2 = $db->selectRow(
268 'comment',
269 [ 'comment_id', 'comment_text', 'comment_data' ],
270 [ 'comment_id' => $id ],
271 __METHOD__
272 );
273 } elseif ( $fallback && isset( $row[$key] ) ) {
274 wfLogWarning( "Using deprecated fallback handling for comment $key" );
275 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
276 } else {
277 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
278 }
279 }
280
281 if ( $row2 ) {
282 $cid = $row2->comment_id;
283 $text = $row2->comment_text;
284 $data = $row2->comment_data;
285 } elseif ( $this->stage < MIGRATION_NEW && array_key_exists( "{$key}_old", $row ) ) {
286 $cid = null;
287 $text = $row["{$key}_old"];
288 $data = null;
289 } else {
290 // @codeCoverageIgnoreStart
291 wfLogWarning( "Missing comment row for $key, id=$id" );
292 $cid = null;
293 $text = '';
294 $data = null;
295 // @codeCoverageIgnoreEnd
296 }
297 }
298
299 $msg = null;
300 if ( $data !== null ) {
301 $data = FormatJson::decode( $data );
302 if ( !is_object( $data ) ) {
303 // @codeCoverageIgnoreStart
304 wfLogWarning( "Invalid JSON object in comment: $data" );
305 $data = null;
306 // @codeCoverageIgnoreEnd
307 } else {
308 $data = (array)$data;
309 if ( isset( $data['_message'] ) ) {
310 $msg = self::decodeMessage( $data['_message'] )
311 ->setInterfaceMessageFlag( true );
312 }
313 if ( !empty( $data['_null'] ) ) {
314 $data = null;
315 } else {
316 foreach ( $data as $k => $v ) {
317 if ( substr( $k, 0, 1 ) === '_' ) {
318 unset( $data[$k] );
319 }
320 }
321 }
322 }
323 }
324
325 return new CommentStoreComment( $cid, $text, $msg, $data );
326 }
327
328 /**
329 * Extract the comment from a row
330 *
331 * Use `self::getJoin()` to ensure the row contains the needed data.
332 *
333 * If you need to fake a comment in a row for some reason, set fields
334 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
335 *
336 * @param object|array $row Result row.
337 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
338 * @return CommentStoreComment
339 */
340 public function getComment( $row, $fallback = false ) {
341 return $this->getCommentInternal( null, $row, $fallback );
342 }
343
344 /**
345 * Extract the comment from a row, with legacy lookups.
346 *
347 * If `$row` might have been generated using `self::getFields()` rather
348 * than `self::getJoin()`, use this. Prefer `self::getComment()` if you
349 * know callers used `self::getJoin()` for the row fetch.
350 *
351 * If you need to fake a comment in a row for some reason, set fields
352 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
353 *
354 * @param IDatabase $db Database handle to use for lookup
355 * @param object|array $row Result row.
356 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
357 * @return CommentStoreComment
358 */
359 public function getCommentLegacy( IDatabase $db, $row, $fallback = false ) {
360 return $this->getCommentInternal( $db, $row, $fallback );
361 }
362
363 /**
364 * Create a new CommentStoreComment, inserting it into the database if necessary
365 *
366 * If a comment is going to be passed to `self::insert()` or the like
367 * multiple times, it will be more efficient to pass a CommentStoreComment
368 * once rather than making `self::insert()` do it every time through.
369 *
370 * @note When passing a CommentStoreComment, this may set `$comment->id` if
371 * it's not already set. If `$comment->id` is already set, it will not be
372 * verified that the specified comment actually exists or that it
373 * corresponds to the comment text, message, and/or data in the
374 * CommentStoreComment.
375 * @param IDatabase $dbw Database handle to insert on. Unused if `$comment`
376 * is a CommentStoreComment and `$comment->id` is set.
377 * @param string|Message|CommentStoreComment $comment Comment text or Message object, or
378 * a CommentStoreComment.
379 * @param array|null $data Structured data to store. Keys beginning with '_' are reserved.
380 * Ignored if $comment is a CommentStoreComment.
381 * @return CommentStoreComment
382 */
383 public function createComment( IDatabase $dbw, $comment, array $data = null ) {
384 $comment = CommentStoreComment::newUnsavedComment( $comment, $data );
385
386 # Truncate comment in a Unicode-sensitive manner
387 $comment->text = $this->lang->truncate( $comment->text, self::MAX_COMMENT_LENGTH );
388 if ( mb_strlen( $comment->text, 'UTF-8' ) > self::COMMENT_CHARACTER_LIMIT ) {
389 $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this->lang )->escaped();
390 if ( mb_strlen( $ellipsis ) >= self::COMMENT_CHARACTER_LIMIT ) {
391 // WTF?
392 $ellipsis = '...';
393 }
394 $maxLength = self::COMMENT_CHARACTER_LIMIT - mb_strlen( $ellipsis, 'UTF-8' );
395 $comment->text = mb_substr( $comment->text, 0, $maxLength, 'UTF-8' ) . $ellipsis;
396 }
397
398 if ( $this->stage > MIGRATION_OLD && !$comment->id ) {
399 $dbData = $comment->data;
400 if ( !$comment->message instanceof RawMessage ) {
401 if ( $dbData === null ) {
402 $dbData = [ '_null' => true ];
403 }
404 $dbData['_message'] = self::encodeMessage( $comment->message );
405 }
406 if ( $dbData !== null ) {
407 $dbData = FormatJson::encode( (object)$dbData, false, FormatJson::ALL_OK );
408 $len = strlen( $dbData );
409 if ( $len > self::MAX_DATA_LENGTH ) {
410 $max = self::MAX_DATA_LENGTH;
411 throw new OverflowException( "Comment data is too long ($len bytes, maximum is $max)" );
412 }
413 }
414
415 $hash = self::hash( $comment->text, $dbData );
416 $comment->id = $dbw->selectField(
417 'comment',
418 'comment_id',
419 [
420 'comment_hash' => $hash,
421 'comment_text' => $comment->text,
422 'comment_data' => $dbData,
423 ],
424 __METHOD__
425 );
426 if ( !$comment->id ) {
427 $dbw->insert(
428 'comment',
429 [
430 'comment_hash' => $hash,
431 'comment_text' => $comment->text,
432 'comment_data' => $dbData,
433 ],
434 __METHOD__
435 );
436 $comment->id = $dbw->insertId();
437 }
438 }
439
440 return $comment;
441 }
442
443 /**
444 * Implementation for `self::insert()` and `self::insertWithTempTable()`
445 * @param IDatabase $dbw
446 * @param string|Message|CommentStoreComment $comment
447 * @param array|null $data
448 * @return array [ array $fields, callable $callback ]
449 */
450 private function insertInternal( IDatabase $dbw, $comment, $data ) {
451 $fields = [];
452 $callback = null;
453
454 $comment = $this->createComment( $dbw, $comment, $data );
455
456 if ( $this->stage <= MIGRATION_WRITE_BOTH ) {
457 $fields[$this->key] = $this->lang->truncate( $comment->text, 255 );
458 }
459
460 if ( $this->stage >= MIGRATION_WRITE_BOTH ) {
461 if ( isset( self::$tempTables[$this->key] ) ) {
462 $t = self::$tempTables[$this->key];
463 $func = __METHOD__;
464 $commentId = $comment->id;
465 $callback = function ( $id ) use ( $dbw, $commentId, $t, $func ) {
466 $dbw->insert(
467 $t['table'],
468 [
469 $t['pk'] => $id,
470 $t['field'] => $commentId,
471 ],
472 $func
473 );
474 };
475 } else {
476 $fields["{$this->key}_id"] = $comment->id;
477 }
478 }
479
480 return [ $fields, $callback ];
481 }
482
483 /**
484 * Insert a comment in preparation for a row that references it
485 *
486 * @note It's recommended to include both the call to this method and the
487 * row insert in the same transaction.
488 * @param IDatabase $dbw Database handle to insert on
489 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
490 * @param array|null $data As for `self::createComment()`
491 * @return array Fields for the insert or update
492 */
493 public function insert( IDatabase $dbw, $comment, $data = null ) {
494 if ( isset( self::$tempTables[$this->key] ) ) {
495 throw new InvalidArgumentException( "Must use insertWithTempTable() for $this->key" );
496 }
497
498 list( $fields ) = $this->insertInternal( $dbw, $comment, $data );
499 return $fields;
500 }
501
502 /**
503 * Insert a comment in a temporary table in preparation for a row that references it
504 *
505 * This is currently needed for "rev_comment" and "img_description". In the
506 * future that requirement will be removed.
507 *
508 * @note It's recommended to include both the call to this method and the
509 * row insert in the same transaction.
510 * @param IDatabase $dbw Database handle to insert on
511 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
512 * @param array|null $data As for `self::createComment()`
513 * @return array Two values:
514 * - array Fields for the insert or update
515 * - callable Function to call when the primary key of the row being
516 * inserted/updated is known. Pass it that primary key.
517 */
518 public function insertWithTempTable( IDatabase $dbw, $comment, $data = null ) {
519 if ( isset( self::$formerTempTables[$this->key] ) ) {
520 wfDeprecated( __METHOD__ . " for $this->key", self::$formerTempTables[$this->key] );
521 } elseif ( !isset( self::$tempTables[$this->key] ) ) {
522 throw new InvalidArgumentException( "Must use insert() for $this->key" );
523 }
524
525 list( $fields, $callback ) = $this->insertInternal( $dbw, $comment, $data );
526 if ( !$callback ) {
527 $callback = function () {
528 // Do nothing.
529 };
530 }
531 return [ $fields, $callback ];
532 }
533
534 /**
535 * Encode a Message as a PHP data structure
536 * @param Message $msg
537 * @return array
538 */
539 protected static function encodeMessage( Message $msg ) {
540 $key = count( $msg->getKeysToTry() ) > 1 ? $msg->getKeysToTry() : $msg->getKey();
541 $params = $msg->getParams();
542 foreach ( $params as &$param ) {
543 if ( $param instanceof Message ) {
544 $param = [
545 'message' => self::encodeMessage( $param )
546 ];
547 }
548 }
549 array_unshift( $params, $key );
550 return $params;
551 }
552
553 /**
554 * Decode a message that was encoded by self::encodeMessage()
555 * @param array $data
556 * @return Message
557 */
558 protected static function decodeMessage( $data ) {
559 $key = array_shift( $data );
560 foreach ( $data as &$param ) {
561 if ( is_object( $param ) ) {
562 $param = (array)$param;
563 }
564 if ( is_array( $param ) && count( $param ) === 1 && isset( $param['message'] ) ) {
565 $param = self::decodeMessage( $param['message'] );
566 }
567 }
568 return new Message( $key, $data );
569 }
570
571 /**
572 * Hashing function for comment storage
573 * @param string $text Comment text
574 * @param string|null $data Comment data
575 * @return int 32-bit signed integer
576 */
577 public static function hash( $text, $data ) {
578 $hash = crc32( $text ) ^ crc32( (string)$data );
579
580 // 64-bit PHP returns an unsigned CRC, change it to signed for
581 // insertion into the database.
582 if ( $hash >= 0x80000000 ) {
583 $hash |= -1 << 32;
584 }
585
586 return $hash;
587 }
588
589 }