Merge "rdbms: add LoadBalancer::getLocalDomainId() method"
[lhc/web/wiklou.git] / includes / Storage / SqlBlobStore.php
1 <?php
2 /**
3 * Service for storing and loading data blobs representing revision content.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * Attribution notice: when this file was created, much of its content was taken
21 * from the Revision.php file as present in release 1.30. Refer to the history
22 * of that file for original authorship.
23 *
24 * @file
25 */
26
27 namespace MediaWiki\Storage;
28
29 use DBAccessObjectUtils;
30 use ExternalStore;
31 use IDBAccessObject;
32 use IExpiringStore;
33 use InvalidArgumentException;
34 use Language;
35 use MWException;
36 use WANObjectCache;
37 use Wikimedia\Assert\Assert;
38 use Wikimedia\Rdbms\Database;
39 use Wikimedia\Rdbms\IDatabase;
40 use Wikimedia\Rdbms\LoadBalancer;
41
42 /**
43 * Service for storing and loading Content objects.
44 *
45 * @since 1.31
46 *
47 * @note This was written to act as a drop-in replacement for the corresponding
48 * static methods in Revision.
49 */
50 class SqlBlobStore implements IDBAccessObject, BlobStore {
51
52 // Note: the name has been taken unchanged from the Revision class.
53 const TEXT_CACHE_GROUP = 'revisiontext:10';
54
55 /**
56 * @var LoadBalancer
57 */
58 private $dbLoadBalancer;
59
60 /**
61 * @var WANObjectCache
62 */
63 private $cache;
64
65 /**
66 * @var bool|string Wiki ID
67 */
68 private $wikiId;
69
70 /**
71 * @var int
72 */
73 private $cacheExpiry = 604800; // 7 days
74
75 /**
76 * @var bool
77 */
78 private $compressBlobs = false;
79
80 /**
81 * @var bool|string
82 */
83 private $legacyEncoding = false;
84
85 /**
86 * @var Language|null
87 */
88 private $legacyEncodingConversionLang = null;
89
90 /**
91 * @var boolean
92 */
93 private $useExternalStore = false;
94
95 /**
96 * @param LoadBalancer $dbLoadBalancer A load balancer for acquiring database connections
97 * @param WANObjectCache $cache A cache manager for caching blobs
98 * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki.
99 */
100 public function __construct(
101 LoadBalancer $dbLoadBalancer,
102 WANObjectCache $cache,
103 $wikiId = false
104 ) {
105 $this->dbLoadBalancer = $dbLoadBalancer;
106 $this->cache = $cache;
107 $this->wikiId = $wikiId;
108 }
109
110 /**
111 * @return int time for which blobs can be cached, in seconds
112 */
113 public function getCacheExpiry() {
114 return $this->cacheExpiry;
115 }
116
117 /**
118 * @param int $cacheExpiry time for which blobs can be cached, in seconds
119 */
120 public function setCacheExpiry( $cacheExpiry ) {
121 Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
122
123 $this->cacheExpiry = $cacheExpiry;
124 }
125
126 /**
127 * @return bool whether blobs should be compressed for storage
128 */
129 public function getCompressBlobs() {
130 return $this->compressBlobs;
131 }
132
133 /**
134 * @param bool $compressBlobs whether blobs should be compressed for storage
135 */
136 public function setCompressBlobs( $compressBlobs ) {
137 $this->compressBlobs = $compressBlobs;
138 }
139
140 /**
141 * @return false|string The legacy encoding to assume for blobs that are not marked as utf8.
142 * False means handling of legacy encoding is disabled, and utf8 assumed.
143 */
144 public function getLegacyEncoding() {
145 return $this->legacyEncoding;
146 }
147
148 /**
149 * @return Language|null The locale to use when decoding from a legacy encoding, or null
150 * if handling of legacy encoding is disabled.
151 */
152 public function getLegacyEncodingConversionLang() {
153 return $this->legacyEncodingConversionLang;
154 }
155
156 /**
157 * @param string $legacyEncoding The legacy encoding to assume for blobs that are
158 * not marked as utf8.
159 * @param Language $language The locale to use when decoding from a legacy encoding.
160 */
161 public function setLegacyEncoding( $legacyEncoding, Language $language ) {
162 Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
163
164 $this->legacyEncoding = $legacyEncoding;
165 $this->legacyEncodingConversionLang = $language;
166 }
167
168 /**
169 * @return bool Whether to use the ExternalStore mechanism for storing blobs.
170 */
171 public function getUseExternalStore() {
172 return $this->useExternalStore;
173 }
174
175 /**
176 * @param bool $useExternalStore Whether to use the ExternalStore mechanism for storing blobs.
177 */
178 public function setUseExternalStore( $useExternalStore ) {
179 Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
180
181 $this->useExternalStore = $useExternalStore;
182 }
183
184 /**
185 * @return LoadBalancer
186 */
187 private function getDBLoadBalancer() {
188 return $this->dbLoadBalancer;
189 }
190
191 /**
192 * @param int $index A database index, like DB_MASTER or DB_REPLICA
193 *
194 * @return IDatabase
195 */
196 private function getDBConnection( $index ) {
197 $lb = $this->getDBLoadBalancer();
198 return $lb->getConnection( $index, [], $this->wikiId );
199 }
200
201 /**
202 * Stores an arbitrary blob of data and returns an address that can be used with
203 * getBlob() to retrieve the same blob of data,
204 *
205 * @param string $data
206 * @param array $hints An array of hints.
207 *
208 * @throws BlobAccessException
209 * @return string an address that can be used with getBlob() to retrieve the data.
210 */
211 public function storeBlob( $data, $hints = [] ) {
212 try {
213 $flags = $this->compressData( $data );
214
215 # Write to external storage if required
216 if ( $this->useExternalStore ) {
217 // Store and get the URL
218 $data = ExternalStore::insertToDefault( $data );
219 if ( !$data ) {
220 throw new BlobAccessException( "Failed to store text to external storage" );
221 }
222 if ( $flags ) {
223 $flags .= ',';
224 }
225 $flags .= 'external';
226
227 // TODO: we could also return an address for the external store directly here.
228 // That would mean bypassing the text table entirely when the external store is
229 // used. We'll need to assess expected fallout before doing that.
230 }
231
232 $dbw = $this->getDBConnection( DB_MASTER );
233
234 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
235 $dbw->insert(
236 'text',
237 [
238 'old_id' => $old_id,
239 'old_text' => $data,
240 'old_flags' => $flags,
241 ],
242 __METHOD__
243 );
244
245 $textId = $dbw->insertId();
246
247 return 'tt:' . $textId;
248 } catch ( MWException $e ) {
249 throw new BlobAccessException( $e->getMessage(), 0, $e );
250 }
251 }
252
253 /**
254 * Retrieve a blob, given an address.
255 * Currently hardcoded to the 'text' table storage engine.
256 *
257 * MCR migration note: this replaces Revision::loadText
258 *
259 * @param string $blobAddress
260 * @param int $queryFlags
261 *
262 * @throws BlobAccessException
263 * @return string
264 */
265 public function getBlob( $blobAddress, $queryFlags = 0 ) {
266 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
267
268 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
269 $blob = $this->cache->getWithSetCallback(
270 // TODO: change key, since this is not necessarily revision text!
271 $this->cache->makeKey( 'revisiontext', 'textid', $blobAddress ),
272 $this->getCacheTTL(),
273 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
274 list( $index ) = DBAccessObjectUtils::getDBOptions( $queryFlags );
275 $setOpts += Database::getCacheSetOptions( $this->getDBConnection( $index ) );
276
277 return $this->fetchBlob( $blobAddress, $queryFlags );
278 },
279 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
280 );
281
282 if ( $blob === false ) {
283 throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
284 }
285
286 return $blob;
287 }
288
289 /**
290 * MCR migration note: this corresponds to Revision::fetchText
291 *
292 * @param string $blobAddress
293 * @param int $queryFlags
294 *
295 * @throw BlobAccessException
296 * @return string|false
297 */
298 private function fetchBlob( $blobAddress, $queryFlags ) {
299 list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
300
301 //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
302 //TODO: MCR: also support 'ar' schema for content blobs in old style archive rows!
303 if ( $schema === 'tt' ) {
304 $textId = intval( $id );
305 } else {
306 // XXX: change to better exceptions! That makes migration more difficult, though.
307 throw new BlobAccessException( "Unknown blob address schema: $schema" );
308 }
309
310 if ( !$textId || $id !== (string)$textId ) {
311 // XXX: change to better exceptions! That makes migration more difficult, though.
312 throw new BlobAccessException( "Bad blob address: $blobAddress" );
313 }
314
315 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
316 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
317 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
318 ? self::READ_LATEST_IMMUTABLE
319 : 0;
320
321 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
322 DBAccessObjectUtils::getDBOptions( $queryFlags );
323
324 // Text data is immutable; check replica DBs first.
325 $row = $this->getDBConnection( $index )->selectRow(
326 'text',
327 [ 'old_text', 'old_flags' ],
328 [ 'old_id' => $textId ],
329 __METHOD__,
330 $options
331 );
332
333 // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
334 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
335 if ( !$row && $fallbackIndex !== null ) {
336 $row = $this->getDBConnection( $fallbackIndex )->selectRow(
337 'text',
338 [ 'old_text', 'old_flags' ],
339 [ 'old_id' => $textId ],
340 __METHOD__,
341 $fallbackOptions
342 );
343 }
344
345 if ( !$row ) {
346 wfWarn( __METHOD__ . ": No text row with ID $textId." );
347 return false;
348 }
349
350 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
351
352 if ( $blob === false ) {
353 wfWarn( __METHOD__ . ": Bad data in text row $textId." );
354 return false;
355 }
356
357 return $blob;
358 }
359
360 /**
361 * Expand a raw data blob according to the flags given.
362 *
363 * MCR migration note: this replaces Revision::getRevisionText
364 *
365 * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead.
366 * @todo make this private, there should be no need to use this method outside this class.
367 *
368 * @param string $raw The raw blob data, to be processed according to $flags.
369 * May be the blob itself, or the blob compressed, or just the address
370 * of the actual blob, depending on $flags.
371 * @param string|string[] $flags Blob flags, such as 'external' or 'gzip'.
372 * @param string|null $cacheKey May be used for caching if given
373 *
374 * @return false|string The expanded blob or false on failure
375 */
376 public function expandBlob( $raw, $flags, $cacheKey = null ) {
377 if ( is_string( $flags ) ) {
378 $flags = explode( ',', $flags );
379 }
380
381 // Use external methods for external objects, text in table is URL-only then
382 if ( in_array( 'external', $flags ) ) {
383 $url = $raw;
384 $parts = explode( '://', $url, 2 );
385 if ( count( $parts ) == 1 || $parts[1] == '' ) {
386 return false;
387 }
388
389 if ( $cacheKey && $this->wikiId === false ) {
390 // Make use of the wiki-local revision text cache.
391 // The cached value should be decompressed, so handle that and return here.
392 // NOTE: we rely on $this->cache being the right cache for $this->wikiId!
393 return $this->cache->getWithSetCallback(
394 // TODO: change key, since this is not necessarily revision text!
395 $this->cache->makeKey( 'revisiontext', 'textid', $cacheKey ),
396 $this->getCacheTTL(),
397 function () use ( $url, $flags ) {
398 // No negative caching per BlobStore::getBlob()
399 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
400
401 return $this->decompressData( $blob, $flags );
402 },
403 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
404 );
405 } else {
406 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
407 return $this->decompressData( $blob, $flags );
408 }
409 } else {
410 return $this->decompressData( $raw, $flags );
411 }
412 }
413
414 /**
415 * If $wgCompressRevisions is enabled, we will compress data.
416 * The input string is modified in place.
417 * Return value is the flags field: contains 'gzip' if the
418 * data is compressed, and 'utf-8' if we're saving in UTF-8
419 * mode.
420 *
421 * MCR migration note: this replaces Revision::compressRevisionText
422 *
423 * @note direct use is deprecated!
424 * @todo make this private, there should be no need to use this method outside this class.
425 *
426 * @param mixed &$blob Reference to a text
427 *
428 * @return string
429 */
430 public function compressData( &$blob ) {
431 $blobFlags = [];
432
433 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
434 // XXX: if $this->legacyEncoding is not set, we could skip this. May be risky, though.
435 $blobFlags[] = 'utf-8';
436
437 if ( $this->compressBlobs ) {
438 if ( function_exists( 'gzdeflate' ) ) {
439 $deflated = gzdeflate( $blob );
440
441 if ( $deflated === false ) {
442 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
443 } else {
444 $blob = $deflated;
445 $blobFlags[] = 'gzip';
446 }
447 } else {
448 wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
449 }
450 }
451 return implode( ',', $blobFlags );
452 }
453
454 /**
455 * Re-converts revision text according to its flags.
456 *
457 * MCR migration note: this replaces Revision::decompressRevisionText
458 *
459 * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead.
460 * @todo make this private, there should be no need to use this method outside this class.
461 *
462 * @param mixed $blob Reference to a text
463 * @param array $blobFlags Compression flags
464 *
465 * @return string|bool Decompressed text, or false on failure
466 */
467 public function decompressData( $blob, $blobFlags ) {
468 if ( $blob === false ) {
469 // Text failed to be fetched; nothing to do
470 return false;
471 }
472
473 if ( in_array( 'error', $blobFlags ) ) {
474 // Error row, return false
475 return false;
476 }
477
478 if ( in_array( 'gzip', $blobFlags ) ) {
479 # Deal with optional compression of archived pages.
480 # This can be done periodically via maintenance/compressOld.php, and
481 # as pages are saved if $wgCompressRevisions is set.
482 $blob = gzinflate( $blob );
483
484 if ( $blob === false ) {
485 wfLogWarning( __METHOD__ . ': gzinflate() failed' );
486 return false;
487 }
488 }
489
490 if ( in_array( 'object', $blobFlags ) ) {
491 # Generic compressed storage
492 $obj = unserialize( $blob );
493 if ( !is_object( $obj ) ) {
494 // Invalid object
495 return false;
496 }
497 $blob = $obj->getText();
498 }
499
500 // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
501 if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
502 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
503 ) {
504 # Old revisions kept around in a legacy encoding?
505 # Upconvert on demand.
506 # ("utf8" checked for compatibility with some broken
507 # conversion scripts 2008-12-30)
508 $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
509 }
510
511 return $blob;
512 }
513
514 /**
515 * Get the text cache TTL
516 *
517 * MCR migration note: this replaces Revision::getCacheTTL
518 *
519 * @return int
520 */
521 private function getCacheTTL() {
522 if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
523 <= WANObjectCache::QOS_EMULATION_SQL
524 ) {
525 // Do not cache RDBMs blobs in...the RDBMs store
526 $ttl = WANObjectCache::TTL_UNCACHEABLE;
527 } else {
528 $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
529 }
530
531 return $ttl;
532 }
533
534 /**
535 * Returns an ID corresponding to the old_id field in the text table, corresponding
536 * to the given $address.
537 *
538 * Currently, $address must start with 'tt:' followed by a decimal integer representing
539 * the old_id; if $address does not start with 'tt:', null is returned. However,
540 * the implementation may change to insert rows into the text table on the fly.
541 *
542 * @note This method exists for use with the text table based storage schema.
543 * It should not be assumed that is will function with all future kinds of content addresses.
544 *
545 * @deprecated since 1.31, so not assume that all blob addresses refer to a row in the text
546 * table. This method should become private once the relevant refactoring in WikiPage is
547 * complete.
548 *
549 * @param string $address
550 *
551 * @return int|null
552 */
553 public function getTextIdFromAddress( $address ) {
554 list( $schema, $id, ) = self::splitBlobAddress( $address );
555
556 if ( $schema !== 'tt' ) {
557 return null;
558 }
559
560 $textId = intval( $id );
561
562 if ( !$textId || $id !== (string)$textId ) {
563 throw new InvalidArgumentException( "Malformed text_id: $id" );
564 }
565
566 return $textId;
567 }
568
569 /**
570 * Splits a blob address into three parts: the schema, the ID, and parameters/flags.
571 *
572 * @param string $address
573 *
574 * @throws InvalidArgumentException
575 * @return array [ $schema, $id, $parameters ], with $parameters being an assoc array.
576 */
577 private static function splitBlobAddress( $address ) {
578 if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
579 throw new InvalidArgumentException( "Bad blob address: $address" );
580 }
581
582 $schema = strtolower( $m[1] );
583 $id = $m[2];
584 $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
585
586 return [ $schema, $id, $parameters ];
587 }
588
589 }