Merge "Fix option name in maintenance/importDump.php"
[lhc/web/wiklou.git] / includes / Storage / SqlBlobStore.php
1 <?php
2 /**
3 * Service for storing and loading data blobs representing revision content.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * Attribution notice: when this file was created, much of its content was taken
21 * from the Revision.php file as present in release 1.30. Refer to the history
22 * of that file for original authorship.
23 *
24 * @file
25 */
26
27 namespace MediaWiki\Storage;
28
29 use DBAccessObjectUtils;
30 use ExternalStore;
31 use IDBAccessObject;
32 use IExpiringStore;
33 use InvalidArgumentException;
34 use Language;
35 use MWException;
36 use WANObjectCache;
37 use Wikimedia\Assert\Assert;
38 use Wikimedia\Rdbms\Database;
39 use Wikimedia\Rdbms\IDatabase;
40 use Wikimedia\Rdbms\LoadBalancer;
41
42 /**
43 * Service for storing and loading Content objects.
44 *
45 * @since 1.31
46 *
47 * @note This was written to act as a drop-in replacement for the corresponding
48 * static methods in Revision.
49 */
50 class SqlBlobStore implements IDBAccessObject, BlobStore {
51
52 // Note: the name has been taken unchanged from the Revision class.
53 const TEXT_CACHE_GROUP = 'revisiontext:10';
54
55 /**
56 * @var LoadBalancer
57 */
58 private $dbLoadBalancer;
59
60 /**
61 * @var WANObjectCache
62 */
63 private $cache;
64
65 /**
66 * @var bool|string Wiki ID
67 */
68 private $wikiId;
69
70 /**
71 * @var int
72 */
73 private $cacheExpiry = 604800; // 7 days
74
75 /**
76 * @var bool
77 */
78 private $compressBlobs = false;
79
80 /**
81 * @var bool|string
82 */
83 private $legacyEncoding = false;
84
85 /**
86 * @var Language|null
87 */
88 private $legacyEncodingConversionLang = null;
89
90 /**
91 * @var boolean
92 */
93 private $useExternalStore = false;
94
95 /**
96 * @param LoadBalancer $dbLoadBalancer A load balancer for acquiring database connections
97 * @param WANObjectCache $cache A cache manager for caching blobs. This can be the local
98 * wiki's default instance even if $wikiId refers to a different wiki, since
99 * makeGlobalKey() is used to constructed a key that allows cached blobs from the
100 * same database to be re-used between wikis. For example, enwiki and frwiki will
101 * use the same cache keys for blobs from the wikidatawiki database, regardless of
102 * the cache's default key space.
103 * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki.
104 */
105 public function __construct(
106 LoadBalancer $dbLoadBalancer,
107 WANObjectCache $cache,
108 $wikiId = false
109 ) {
110 $this->dbLoadBalancer = $dbLoadBalancer;
111 $this->cache = $cache;
112 $this->wikiId = $wikiId;
113 }
114
115 /**
116 * @return int time for which blobs can be cached, in seconds
117 */
118 public function getCacheExpiry() {
119 return $this->cacheExpiry;
120 }
121
122 /**
123 * @param int $cacheExpiry time for which blobs can be cached, in seconds
124 */
125 public function setCacheExpiry( $cacheExpiry ) {
126 Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
127
128 $this->cacheExpiry = $cacheExpiry;
129 }
130
131 /**
132 * @return bool whether blobs should be compressed for storage
133 */
134 public function getCompressBlobs() {
135 return $this->compressBlobs;
136 }
137
138 /**
139 * @param bool $compressBlobs whether blobs should be compressed for storage
140 */
141 public function setCompressBlobs( $compressBlobs ) {
142 $this->compressBlobs = $compressBlobs;
143 }
144
145 /**
146 * @return false|string The legacy encoding to assume for blobs that are not marked as utf8.
147 * False means handling of legacy encoding is disabled, and utf8 assumed.
148 */
149 public function getLegacyEncoding() {
150 return $this->legacyEncoding;
151 }
152
153 /**
154 * @return Language|null The locale to use when decoding from a legacy encoding, or null
155 * if handling of legacy encoding is disabled.
156 */
157 public function getLegacyEncodingConversionLang() {
158 return $this->legacyEncodingConversionLang;
159 }
160
161 /**
162 * @param string $legacyEncoding The legacy encoding to assume for blobs that are
163 * not marked as utf8.
164 * @param Language $language The locale to use when decoding from a legacy encoding.
165 */
166 public function setLegacyEncoding( $legacyEncoding, Language $language ) {
167 Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
168
169 $this->legacyEncoding = $legacyEncoding;
170 $this->legacyEncodingConversionLang = $language;
171 }
172
173 /**
174 * @return bool Whether to use the ExternalStore mechanism for storing blobs.
175 */
176 public function getUseExternalStore() {
177 return $this->useExternalStore;
178 }
179
180 /**
181 * @param bool $useExternalStore Whether to use the ExternalStore mechanism for storing blobs.
182 */
183 public function setUseExternalStore( $useExternalStore ) {
184 Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
185
186 $this->useExternalStore = $useExternalStore;
187 }
188
189 /**
190 * @return LoadBalancer
191 */
192 private function getDBLoadBalancer() {
193 return $this->dbLoadBalancer;
194 }
195
196 /**
197 * @param int $index A database index, like DB_MASTER or DB_REPLICA
198 *
199 * @return IDatabase
200 */
201 private function getDBConnection( $index ) {
202 $lb = $this->getDBLoadBalancer();
203 return $lb->getConnection( $index, [], $this->wikiId );
204 }
205
206 /**
207 * Stores an arbitrary blob of data and returns an address that can be used with
208 * getBlob() to retrieve the same blob of data,
209 *
210 * @param string $data
211 * @param array $hints An array of hints.
212 *
213 * @throws BlobAccessException
214 * @return string an address that can be used with getBlob() to retrieve the data.
215 */
216 public function storeBlob( $data, $hints = [] ) {
217 try {
218 $flags = $this->compressData( $data );
219
220 # Write to external storage if required
221 if ( $this->useExternalStore ) {
222 // Store and get the URL
223 $data = ExternalStore::insertToDefault( $data );
224 if ( !$data ) {
225 throw new BlobAccessException( "Failed to store text to external storage" );
226 }
227 if ( $flags ) {
228 $flags .= ',';
229 }
230 $flags .= 'external';
231
232 // TODO: we could also return an address for the external store directly here.
233 // That would mean bypassing the text table entirely when the external store is
234 // used. We'll need to assess expected fallout before doing that.
235 }
236
237 $dbw = $this->getDBConnection( DB_MASTER );
238
239 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
240 $dbw->insert(
241 'text',
242 [
243 'old_id' => $old_id,
244 'old_text' => $data,
245 'old_flags' => $flags,
246 ],
247 __METHOD__
248 );
249
250 $textId = $dbw->insertId();
251
252 return self::makeAddressFromTextId( $textId );
253 } catch ( MWException $e ) {
254 throw new BlobAccessException( $e->getMessage(), 0, $e );
255 }
256 }
257
258 /**
259 * Retrieve a blob, given an address.
260 * Currently hardcoded to the 'text' table storage engine.
261 *
262 * MCR migration note: this replaces Revision::loadText
263 *
264 * @param string $blobAddress
265 * @param int $queryFlags
266 *
267 * @throws BlobAccessException
268 * @return string
269 */
270 public function getBlob( $blobAddress, $queryFlags = 0 ) {
271 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
272
273 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
274 $blob = $this->cache->getWithSetCallback(
275 $this->getCacheKey( $blobAddress ),
276 $this->getCacheTTL(),
277 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
278 list( $index ) = DBAccessObjectUtils::getDBOptions( $queryFlags );
279 $setOpts += Database::getCacheSetOptions( $this->getDBConnection( $index ) );
280
281 return $this->fetchBlob( $blobAddress, $queryFlags );
282 },
283 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
284 );
285
286 if ( $blob === false ) {
287 throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
288 }
289
290 return $blob;
291 }
292
293 /**
294 * MCR migration note: this corresponds to Revision::fetchText
295 *
296 * @param string $blobAddress
297 * @param int $queryFlags
298 *
299 * @throws BlobAccessException
300 * @return string|false
301 */
302 private function fetchBlob( $blobAddress, $queryFlags ) {
303 list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
304
305 //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
306 if ( $schema === 'tt' ) {
307 $textId = intval( $id );
308 } else {
309 // XXX: change to better exceptions! That makes migration more difficult, though.
310 throw new BlobAccessException( "Unknown blob address schema: $schema" );
311 }
312
313 if ( !$textId || $id !== (string)$textId ) {
314 // XXX: change to better exceptions! That makes migration more difficult, though.
315 throw new BlobAccessException( "Bad blob address: $blobAddress" );
316 }
317
318 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
319 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
320 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
321 ? self::READ_LATEST_IMMUTABLE
322 : 0;
323
324 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
325 DBAccessObjectUtils::getDBOptions( $queryFlags );
326
327 // Text data is immutable; check replica DBs first.
328 $row = $this->getDBConnection( $index )->selectRow(
329 'text',
330 [ 'old_text', 'old_flags' ],
331 [ 'old_id' => $textId ],
332 __METHOD__,
333 $options
334 );
335
336 // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
337 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
338 if ( !$row && $fallbackIndex !== null ) {
339 $row = $this->getDBConnection( $fallbackIndex )->selectRow(
340 'text',
341 [ 'old_text', 'old_flags' ],
342 [ 'old_id' => $textId ],
343 __METHOD__,
344 $fallbackOptions
345 );
346 }
347
348 if ( !$row ) {
349 wfWarn( __METHOD__ . ": No text row with ID $textId." );
350 return false;
351 }
352
353 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
354
355 if ( $blob === false ) {
356 wfLogWarning( __METHOD__ . ": Bad data in text row $textId." );
357 return false;
358 }
359
360 return $blob;
361 }
362
363 /**
364 * Get a cache key for a given Blob address.
365 *
366 * The cache key is constructed in a way that allows cached blobs from the same database
367 * to be re-used between wikis. For example, enwiki and frwiki will use the same cache keys
368 * for blobs from the wikidatawiki database.
369 *
370 * @param string $blobAddress
371 * @return string
372 */
373 private function getCacheKey( $blobAddress ) {
374 return $this->cache->makeGlobalKey(
375 'BlobStore',
376 'address',
377 $this->dbLoadBalancer->resolveDomainID( $this->wikiId ),
378 $blobAddress
379 );
380 }
381
382 /**
383 * Expand a raw data blob according to the flags given.
384 *
385 * MCR migration note: this replaces Revision::getRevisionText
386 *
387 * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead.
388 * @todo make this private, there should be no need to use this method outside this class.
389 *
390 * @param string $raw The raw blob data, to be processed according to $flags.
391 * May be the blob itself, or the blob compressed, or just the address
392 * of the actual blob, depending on $flags.
393 * @param string|string[] $flags Blob flags, such as 'external' or 'gzip'.
394 * Note that not including 'utf-8' in $flags will cause the data to be decoded
395 * according to the legacy encoding specified via setLegacyEncoding.
396 * @param string|null $cacheKey A blob address for use in the cache key. If not given,
397 * caching is disabled.
398 *
399 * @return false|string The expanded blob or false on failure
400 */
401 public function expandBlob( $raw, $flags, $cacheKey = null ) {
402 if ( is_string( $flags ) ) {
403 $flags = explode( ',', $flags );
404 }
405
406 // Use external methods for external objects, text in table is URL-only then
407 if ( in_array( 'external', $flags ) ) {
408 $url = $raw;
409 $parts = explode( '://', $url, 2 );
410 if ( count( $parts ) == 1 || $parts[1] == '' ) {
411 return false;
412 }
413
414 if ( $cacheKey ) {
415 // The cached value should be decompressed, so handle that and return here.
416 return $this->cache->getWithSetCallback(
417 $this->getCacheKey( $cacheKey ),
418 $this->getCacheTTL(),
419 function () use ( $url, $flags ) {
420 // No negative caching per BlobStore::getBlob()
421 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
422
423 return $blob === false ? false : $this->decompressData( $blob, $flags );
424 },
425 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
426 );
427 } else {
428 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
429 return $blob === false ? false : $this->decompressData( $blob, $flags );
430 }
431 } else {
432 return $this->decompressData( $raw, $flags );
433 }
434 }
435
436 /**
437 * If $wgCompressRevisions is enabled, we will compress data.
438 * The input string is modified in place.
439 * Return value is the flags field: contains 'gzip' if the
440 * data is compressed, and 'utf-8' if we're saving in UTF-8
441 * mode.
442 *
443 * MCR migration note: this replaces Revision::compressRevisionText
444 *
445 * @note direct use is deprecated!
446 * @todo make this private, there should be no need to use this method outside this class.
447 *
448 * @param mixed &$blob Reference to a text
449 *
450 * @return string
451 */
452 public function compressData( &$blob ) {
453 $blobFlags = [];
454
455 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
456 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
457 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
458 $blobFlags[] = 'utf-8';
459
460 if ( $this->compressBlobs ) {
461 if ( function_exists( 'gzdeflate' ) ) {
462 $deflated = gzdeflate( $blob );
463
464 if ( $deflated === false ) {
465 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
466 } else {
467 $blob = $deflated;
468 $blobFlags[] = 'gzip';
469 }
470 } else {
471 wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
472 }
473 }
474 return implode( ',', $blobFlags );
475 }
476
477 /**
478 * Re-converts revision text according to its flags.
479 *
480 * MCR migration note: this replaces Revision::decompressRevisionText
481 *
482 * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead.
483 * @todo make this private, there should be no need to use this method outside this class.
484 *
485 * @param string $blob Blob in compressed/encoded form.
486 * @param array $blobFlags Compression flags, such as 'gzip'.
487 * Note that not including 'utf-8' in $blobFlags will cause the data to be decoded
488 * according to the legacy encoding specified via setLegacyEncoding.
489 *
490 * @return string|bool Decompressed text, or false on failure
491 */
492 public function decompressData( $blob, array $blobFlags ) {
493 // Revision::decompressRevisionText accepted false here, so defend against that
494 Assert::parameterType( 'string', $blob, '$blob' );
495
496 if ( in_array( 'error', $blobFlags ) ) {
497 // Error row, return false
498 return false;
499 }
500
501 if ( in_array( 'gzip', $blobFlags ) ) {
502 # Deal with optional compression of archived pages.
503 # This can be done periodically via maintenance/compressOld.php, and
504 # as pages are saved if $wgCompressRevisions is set.
505 $blob = gzinflate( $blob );
506
507 if ( $blob === false ) {
508 wfWarn( __METHOD__ . ': gzinflate() failed' );
509 return false;
510 }
511 }
512
513 if ( in_array( 'object', $blobFlags ) ) {
514 # Generic compressed storage
515 $obj = unserialize( $blob );
516 if ( !is_object( $obj ) ) {
517 // Invalid object
518 return false;
519 }
520 $blob = $obj->getText();
521 }
522
523 // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
524 if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
525 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
526 ) {
527 # Old revisions kept around in a legacy encoding?
528 # Upconvert on demand.
529 # ("utf8" checked for compatibility with some broken
530 # conversion scripts 2008-12-30)
531 $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
532 }
533
534 return $blob;
535 }
536
537 /**
538 * Get the text cache TTL
539 *
540 * MCR migration note: this replaces Revision::getCacheTTL
541 *
542 * @return int
543 */
544 private function getCacheTTL() {
545 if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
546 <= WANObjectCache::QOS_EMULATION_SQL
547 ) {
548 // Do not cache RDBMs blobs in...the RDBMs store
549 $ttl = WANObjectCache::TTL_UNCACHEABLE;
550 } else {
551 $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
552 }
553
554 return $ttl;
555 }
556
557 /**
558 * Returns an ID corresponding to the old_id field in the text table, corresponding
559 * to the given $address.
560 *
561 * Currently, $address must start with 'tt:' followed by a decimal integer representing
562 * the old_id; if $address does not start with 'tt:', null is returned. However,
563 * the implementation may change to insert rows into the text table on the fly.
564 * This implies that this method cannot be static.
565 *
566 * @note This method exists for use with the text table based storage schema.
567 * It should not be assumed that is will function with all future kinds of content addresses.
568 *
569 * @deprecated since 1.31, so don't assume that all blob addresses refer to a row in the text
570 * table. This method should become private once the relevant refactoring in WikiPage is
571 * complete.
572 *
573 * @param string $address
574 *
575 * @return int|null
576 */
577 public function getTextIdFromAddress( $address ) {
578 list( $schema, $id, ) = self::splitBlobAddress( $address );
579
580 if ( $schema !== 'tt' ) {
581 return null;
582 }
583
584 $textId = intval( $id );
585
586 if ( !$textId || $id !== (string)$textId ) {
587 throw new InvalidArgumentException( "Malformed text_id: $id" );
588 }
589
590 return $textId;
591 }
592
593 /**
594 * Returns an address referring to content stored in the text table row with the given ID.
595 * The address schema for blobs stored in the text table is "tt:" followed by an integer
596 * that corresponds to a value of the old_id field.
597 *
598 * @deprecated since 1.31. This method should become private once the relevant refactoring
599 * in WikiPage is complete.
600 *
601 * @param int $id
602 *
603 * @return string
604 */
605 public static function makeAddressFromTextId( $id ) {
606 return 'tt:' . $id;
607 }
608
609 /**
610 * Splits a blob address into three parts: the schema, the ID, and parameters/flags.
611 *
612 * @param string $address
613 *
614 * @throws InvalidArgumentException
615 * @return array [ $schema, $id, $parameters ], with $parameters being an assoc array.
616 */
617 private static function splitBlobAddress( $address ) {
618 if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
619 throw new InvalidArgumentException( "Bad blob address: $address" );
620 }
621
622 $schema = strtolower( $m[1] );
623 $id = $m[2];
624 $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
625
626 return [ $schema, $id, $parameters ];
627 }
628
629 public function isReadOnly() {
630 if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) {
631 return true;
632 }
633
634 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
635 }
636 }