Merge "GlobalFunctions: Document the usage of wfUrlencode( null )"
[lhc/web/wiklou.git] / includes / Storage / SqlBlobStore.php
1 <?php
2 /**
3 * Service for storing and loading data blobs representing revision content.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * Attribution notice: when this file was created, much of its content was taken
21 * from the Revision.php file as present in release 1.30. Refer to the history
22 * of that file for original authorship.
23 *
24 * @file
25 */
26
27 namespace MediaWiki\Storage;
28
29 use DBAccessObjectUtils;
30 use ExternalStore;
31 use IDBAccessObject;
32 use IExpiringStore;
33 use InvalidArgumentException;
34 use Language;
35 use MWException;
36 use WANObjectCache;
37 use Wikimedia\Assert\Assert;
38 use Wikimedia\Rdbms\IDatabase;
39 use Wikimedia\Rdbms\LoadBalancer;
40
41 /**
42 * Service for storing and loading Content objects.
43 *
44 * @since 1.31
45 *
46 * @note This was written to act as a drop-in replacement for the corresponding
47 * static methods in Revision.
48 */
49 class SqlBlobStore implements IDBAccessObject, BlobStore {
50
51 // Note: the name has been taken unchanged from the Revision class.
52 const TEXT_CACHE_GROUP = 'revisiontext:10';
53
54 /**
55 * @var LoadBalancer
56 */
57 private $dbLoadBalancer;
58
59 /**
60 * @var WANObjectCache
61 */
62 private $cache;
63
64 /**
65 * @var bool|string Wiki ID
66 */
67 private $wikiId;
68
69 /**
70 * @var int
71 */
72 private $cacheExpiry = 604800; // 7 days
73
74 /**
75 * @var bool
76 */
77 private $compressBlobs = false;
78
79 /**
80 * @var bool|string
81 */
82 private $legacyEncoding = false;
83
84 /**
85 * @var Language|null
86 */
87 private $legacyEncodingConversionLang = null;
88
89 /**
90 * @var boolean
91 */
92 private $useExternalStore = false;
93
94 /**
95 * @param LoadBalancer $dbLoadBalancer A load balancer for acquiring database connections
96 * @param WANObjectCache $cache A cache manager for caching blobs. This can be the local
97 * wiki's default instance even if $wikiId refers to a different wiki, since
98 * makeGlobalKey() is used to constructed a key that allows cached blobs from the
99 * same database to be re-used between wikis. For example, enwiki and frwiki will
100 * use the same cache keys for blobs from the wikidatawiki database, regardless of
101 * the cache's default key space.
102 * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki.
103 */
104 public function __construct(
105 LoadBalancer $dbLoadBalancer,
106 WANObjectCache $cache,
107 $wikiId = false
108 ) {
109 $this->dbLoadBalancer = $dbLoadBalancer;
110 $this->cache = $cache;
111 $this->wikiId = $wikiId;
112 }
113
114 /**
115 * @return int time for which blobs can be cached, in seconds
116 */
117 public function getCacheExpiry() {
118 return $this->cacheExpiry;
119 }
120
121 /**
122 * @param int $cacheExpiry time for which blobs can be cached, in seconds
123 */
124 public function setCacheExpiry( $cacheExpiry ) {
125 Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
126
127 $this->cacheExpiry = $cacheExpiry;
128 }
129
130 /**
131 * @return bool whether blobs should be compressed for storage
132 */
133 public function getCompressBlobs() {
134 return $this->compressBlobs;
135 }
136
137 /**
138 * @param bool $compressBlobs whether blobs should be compressed for storage
139 */
140 public function setCompressBlobs( $compressBlobs ) {
141 $this->compressBlobs = $compressBlobs;
142 }
143
144 /**
145 * @return false|string The legacy encoding to assume for blobs that are not marked as utf8.
146 * False means handling of legacy encoding is disabled, and utf8 assumed.
147 */
148 public function getLegacyEncoding() {
149 return $this->legacyEncoding;
150 }
151
152 /**
153 * @return Language|null The locale to use when decoding from a legacy encoding, or null
154 * if handling of legacy encoding is disabled.
155 */
156 public function getLegacyEncodingConversionLang() {
157 return $this->legacyEncodingConversionLang;
158 }
159
160 /**
161 * @param string $legacyEncoding The legacy encoding to assume for blobs that are
162 * not marked as utf8.
163 * @param Language $language The locale to use when decoding from a legacy encoding.
164 */
165 public function setLegacyEncoding( $legacyEncoding, Language $language ) {
166 Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
167
168 $this->legacyEncoding = $legacyEncoding;
169 $this->legacyEncodingConversionLang = $language;
170 }
171
172 /**
173 * @return bool Whether to use the ExternalStore mechanism for storing blobs.
174 */
175 public function getUseExternalStore() {
176 return $this->useExternalStore;
177 }
178
179 /**
180 * @param bool $useExternalStore Whether to use the ExternalStore mechanism for storing blobs.
181 */
182 public function setUseExternalStore( $useExternalStore ) {
183 Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
184
185 $this->useExternalStore = $useExternalStore;
186 }
187
188 /**
189 * @return LoadBalancer
190 */
191 private function getDBLoadBalancer() {
192 return $this->dbLoadBalancer;
193 }
194
195 /**
196 * @param int $index A database index, like DB_MASTER or DB_REPLICA
197 *
198 * @return IDatabase
199 */
200 private function getDBConnection( $index ) {
201 $lb = $this->getDBLoadBalancer();
202 return $lb->getConnection( $index, [], $this->wikiId );
203 }
204
205 /**
206 * Stores an arbitrary blob of data and returns an address that can be used with
207 * getBlob() to retrieve the same blob of data,
208 *
209 * @param string $data
210 * @param array $hints An array of hints.
211 *
212 * @throws BlobAccessException
213 * @return string an address that can be used with getBlob() to retrieve the data.
214 */
215 public function storeBlob( $data, $hints = [] ) {
216 try {
217 $flags = $this->compressData( $data );
218
219 # Write to external storage if required
220 if ( $this->useExternalStore ) {
221 // Store and get the URL
222 $data = ExternalStore::insertToDefault( $data );
223 if ( !$data ) {
224 throw new BlobAccessException( "Failed to store text to external storage" );
225 }
226 if ( $flags ) {
227 $flags .= ',';
228 }
229 $flags .= 'external';
230
231 // TODO: we could also return an address for the external store directly here.
232 // That would mean bypassing the text table entirely when the external store is
233 // used. We'll need to assess expected fallout before doing that.
234 }
235
236 $dbw = $this->getDBConnection( DB_MASTER );
237
238 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
239 $dbw->insert(
240 'text',
241 [
242 'old_id' => $old_id,
243 'old_text' => $data,
244 'old_flags' => $flags,
245 ],
246 __METHOD__
247 );
248
249 $textId = $dbw->insertId();
250
251 return self::makeAddressFromTextId( $textId );
252 } catch ( MWException $e ) {
253 throw new BlobAccessException( $e->getMessage(), 0, $e );
254 }
255 }
256
257 /**
258 * Retrieve a blob, given an address.
259 * Currently hardcoded to the 'text' table storage engine.
260 *
261 * MCR migration note: this replaces Revision::loadText
262 *
263 * @param string $blobAddress
264 * @param int $queryFlags
265 *
266 * @throws BlobAccessException
267 * @return string
268 */
269 public function getBlob( $blobAddress, $queryFlags = 0 ) {
270 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
271
272 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
273 $blob = $this->cache->getWithSetCallback(
274 $this->getCacheKey( $blobAddress ),
275 $this->getCacheTTL(),
276 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
277 // Ignore $setOpts; blobs are immutable and negatives are not cached
278 return $this->fetchBlob( $blobAddress, $queryFlags );
279 },
280 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
281 );
282
283 if ( $blob === false ) {
284 throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
285 }
286
287 return $blob;
288 }
289
290 /**
291 * MCR migration note: this corresponds to Revision::fetchText
292 *
293 * @param string $blobAddress
294 * @param int $queryFlags
295 *
296 * @throws BlobAccessException
297 * @return string|false
298 */
299 private function fetchBlob( $blobAddress, $queryFlags ) {
300 list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
301
302 //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
303 if ( $schema === 'tt' ) {
304 $textId = intval( $id );
305 } else {
306 // XXX: change to better exceptions! That makes migration more difficult, though.
307 throw new BlobAccessException( "Unknown blob address schema: $schema" );
308 }
309
310 if ( !$textId || $id !== (string)$textId ) {
311 // XXX: change to better exceptions! That makes migration more difficult, though.
312 throw new BlobAccessException( "Bad blob address: $blobAddress" );
313 }
314
315 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
316 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
317 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
318 ? self::READ_LATEST_IMMUTABLE
319 : 0;
320
321 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
322 DBAccessObjectUtils::getDBOptions( $queryFlags );
323
324 // Text data is immutable; check replica DBs first.
325 $row = $this->getDBConnection( $index )->selectRow(
326 'text',
327 [ 'old_text', 'old_flags' ],
328 [ 'old_id' => $textId ],
329 __METHOD__,
330 $options
331 );
332
333 // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
334 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
335 if ( !$row && $fallbackIndex !== null ) {
336 $row = $this->getDBConnection( $fallbackIndex )->selectRow(
337 'text',
338 [ 'old_text', 'old_flags' ],
339 [ 'old_id' => $textId ],
340 __METHOD__,
341 $fallbackOptions
342 );
343 }
344
345 if ( !$row ) {
346 wfWarn( __METHOD__ . ": No text row with ID $textId." );
347 return false;
348 }
349
350 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
351
352 if ( $blob === false ) {
353 wfLogWarning( __METHOD__ . ": Bad data in text row $textId." );
354 return false;
355 }
356
357 return $blob;
358 }
359
360 /**
361 * Get a cache key for a given Blob address.
362 *
363 * The cache key is constructed in a way that allows cached blobs from the same database
364 * to be re-used between wikis. For example, enwiki and frwiki will use the same cache keys
365 * for blobs from the wikidatawiki database.
366 *
367 * @param string $blobAddress
368 * @return string
369 */
370 private function getCacheKey( $blobAddress ) {
371 return $this->cache->makeGlobalKey(
372 'BlobStore',
373 'address',
374 $this->dbLoadBalancer->resolveDomainID( $this->wikiId ),
375 $blobAddress
376 );
377 }
378
379 /**
380 * Expand a raw data blob according to the flags given.
381 *
382 * MCR migration note: this replaces Revision::getRevisionText
383 *
384 * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead.
385 * @todo make this private, there should be no need to use this method outside this class.
386 *
387 * @param string $raw The raw blob data, to be processed according to $flags.
388 * May be the blob itself, or the blob compressed, or just the address
389 * of the actual blob, depending on $flags.
390 * @param string|string[] $flags Blob flags, such as 'external' or 'gzip'.
391 * Note that not including 'utf-8' in $flags will cause the data to be decoded
392 * according to the legacy encoding specified via setLegacyEncoding.
393 * @param string|null $cacheKey A blob address for use in the cache key. If not given,
394 * caching is disabled.
395 *
396 * @return false|string The expanded blob or false on failure
397 */
398 public function expandBlob( $raw, $flags, $cacheKey = null ) {
399 if ( is_string( $flags ) ) {
400 $flags = explode( ',', $flags );
401 }
402
403 // Use external methods for external objects, text in table is URL-only then
404 if ( in_array( 'external', $flags ) ) {
405 $url = $raw;
406 $parts = explode( '://', $url, 2 );
407 if ( count( $parts ) == 1 || $parts[1] == '' ) {
408 return false;
409 }
410
411 if ( $cacheKey ) {
412 // The cached value should be decompressed, so handle that and return here.
413 return $this->cache->getWithSetCallback(
414 $this->getCacheKey( $cacheKey ),
415 $this->getCacheTTL(),
416 function () use ( $url, $flags ) {
417 // Ignore $setOpts; blobs are immutable and negatives are not cached
418 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
419
420 return $blob === false ? false : $this->decompressData( $blob, $flags );
421 },
422 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
423 );
424 } else {
425 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
426 return $blob === false ? false : $this->decompressData( $blob, $flags );
427 }
428 } else {
429 return $this->decompressData( $raw, $flags );
430 }
431 }
432
433 /**
434 * If $wgCompressRevisions is enabled, we will compress data.
435 * The input string is modified in place.
436 * Return value is the flags field: contains 'gzip' if the
437 * data is compressed, and 'utf-8' if we're saving in UTF-8
438 * mode.
439 *
440 * MCR migration note: this replaces Revision::compressRevisionText
441 *
442 * @note direct use is deprecated!
443 * @todo make this private, there should be no need to use this method outside this class.
444 *
445 * @param mixed &$blob Reference to a text
446 *
447 * @return string
448 */
449 public function compressData( &$blob ) {
450 $blobFlags = [];
451
452 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
453 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
454 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
455 $blobFlags[] = 'utf-8';
456
457 if ( $this->compressBlobs ) {
458 if ( function_exists( 'gzdeflate' ) ) {
459 $deflated = gzdeflate( $blob );
460
461 if ( $deflated === false ) {
462 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
463 } else {
464 $blob = $deflated;
465 $blobFlags[] = 'gzip';
466 }
467 } else {
468 wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
469 }
470 }
471 return implode( ',', $blobFlags );
472 }
473
474 /**
475 * Re-converts revision text according to its flags.
476 *
477 * MCR migration note: this replaces Revision::decompressRevisionText
478 *
479 * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead.
480 * @todo make this private, there should be no need to use this method outside this class.
481 *
482 * @param string $blob Blob in compressed/encoded form.
483 * @param array $blobFlags Compression flags, such as 'gzip'.
484 * Note that not including 'utf-8' in $blobFlags will cause the data to be decoded
485 * according to the legacy encoding specified via setLegacyEncoding.
486 *
487 * @return string|bool Decompressed text, or false on failure
488 */
489 public function decompressData( $blob, array $blobFlags ) {
490 // Revision::decompressRevisionText accepted false here, so defend against that
491 Assert::parameterType( 'string', $blob, '$blob' );
492
493 if ( in_array( 'error', $blobFlags ) ) {
494 // Error row, return false
495 return false;
496 }
497
498 if ( in_array( 'gzip', $blobFlags ) ) {
499 # Deal with optional compression of archived pages.
500 # This can be done periodically via maintenance/compressOld.php, and
501 # as pages are saved if $wgCompressRevisions is set.
502 $blob = gzinflate( $blob );
503
504 if ( $blob === false ) {
505 wfWarn( __METHOD__ . ': gzinflate() failed' );
506 return false;
507 }
508 }
509
510 if ( in_array( 'object', $blobFlags ) ) {
511 # Generic compressed storage
512 $obj = unserialize( $blob );
513 if ( !is_object( $obj ) ) {
514 // Invalid object
515 return false;
516 }
517 $blob = $obj->getText();
518 }
519
520 // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
521 if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
522 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
523 ) {
524 # Old revisions kept around in a legacy encoding?
525 # Upconvert on demand.
526 # ("utf8" checked for compatibility with some broken
527 # conversion scripts 2008-12-30)
528 $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
529 }
530
531 return $blob;
532 }
533
534 /**
535 * Get the text cache TTL
536 *
537 * MCR migration note: this replaces Revision::getCacheTTL
538 *
539 * @return int
540 */
541 private function getCacheTTL() {
542 if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
543 <= WANObjectCache::QOS_EMULATION_SQL
544 ) {
545 // Do not cache RDBMs blobs in...the RDBMs store
546 $ttl = WANObjectCache::TTL_UNCACHEABLE;
547 } else {
548 $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
549 }
550
551 return $ttl;
552 }
553
554 /**
555 * Returns an ID corresponding to the old_id field in the text table, corresponding
556 * to the given $address.
557 *
558 * Currently, $address must start with 'tt:' followed by a decimal integer representing
559 * the old_id; if $address does not start with 'tt:', null is returned. However,
560 * the implementation may change to insert rows into the text table on the fly.
561 * This implies that this method cannot be static.
562 *
563 * @note This method exists for use with the text table based storage schema.
564 * It should not be assumed that is will function with all future kinds of content addresses.
565 *
566 * @deprecated since 1.31, so don't assume that all blob addresses refer to a row in the text
567 * table. This method should become private once the relevant refactoring in WikiPage is
568 * complete.
569 *
570 * @param string $address
571 *
572 * @return int|null
573 */
574 public function getTextIdFromAddress( $address ) {
575 list( $schema, $id, ) = self::splitBlobAddress( $address );
576
577 if ( $schema !== 'tt' ) {
578 return null;
579 }
580
581 $textId = intval( $id );
582
583 if ( !$textId || $id !== (string)$textId ) {
584 throw new InvalidArgumentException( "Malformed text_id: $id" );
585 }
586
587 return $textId;
588 }
589
590 /**
591 * Returns an address referring to content stored in the text table row with the given ID.
592 * The address schema for blobs stored in the text table is "tt:" followed by an integer
593 * that corresponds to a value of the old_id field.
594 *
595 * @deprecated since 1.31. This method should become private once the relevant refactoring
596 * in WikiPage is complete.
597 *
598 * @param int $id
599 *
600 * @return string
601 */
602 public static function makeAddressFromTextId( $id ) {
603 return 'tt:' . $id;
604 }
605
606 /**
607 * Splits a blob address into three parts: the schema, the ID, and parameters/flags.
608 *
609 * @since 1.33
610 *
611 * @param string $address
612 *
613 * @throws InvalidArgumentException
614 * @return array [ $schema, $id, $parameters ], with $parameters being an assoc array.
615 */
616 public static function splitBlobAddress( $address ) {
617 if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
618 throw new InvalidArgumentException( "Bad blob address: $address" );
619 }
620
621 $schema = strtolower( $m[1] );
622 $id = $m[2];
623 $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
624
625 return [ $schema, $id, $parameters ];
626 }
627
628 public function isReadOnly() {
629 if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) {
630 return true;
631 }
632
633 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
634 }
635 }