NameTableStore: ensure consistency upon rollback.
[lhc/web/wiklou.git] / includes / Storage / NameTableStore.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 */
20
21 namespace MediaWiki\Storage;
22
23 use Exception;
24 use IExpiringStore;
25 use Psr\Log\LoggerInterface;
26 use WANObjectCache;
27 use Wikimedia\Assert\Assert;
28 use Wikimedia\Rdbms\Database;
29 use Wikimedia\Rdbms\IDatabase;
30 use Wikimedia\Rdbms\ILoadBalancer;
31
32 /**
33 * @author Addshore
34 * @since 1.31
35 */
36 class NameTableStore {
37
38 /** @var ILoadBalancer */
39 private $loadBalancer;
40
41 /** @var WANObjectCache */
42 private $cache;
43
44 /** @var LoggerInterface */
45 private $logger;
46
47 /** @var string[] */
48 private $tableCache = null;
49
50 /** @var bool|string */
51 private $domain = false;
52
53 /** @var int */
54 private $cacheTTL;
55
56 /** @var string */
57 private $table;
58 /** @var string */
59 private $idField;
60 /** @var string */
61 private $nameField;
62 /** @var null|callable */
63 private $normalizationCallback = null;
64 /** @var null|callable */
65 private $insertCallback = null;
66
67 /**
68 * @param ILoadBalancer $dbLoadBalancer A load balancer for acquiring database connections
69 * @param WANObjectCache $cache A cache manager for caching data. This can be the local
70 * wiki's default instance even if $dbDomain refers to a different wiki, since
71 * makeGlobalKey() is used to constructed a key that allows cached names from
72 * the same database to be re-used between wikis. For example, enwiki and frwiki will
73 * use the same cache keys for names from the wikidatawiki database, regardless
74 * of the cache's default key space.
75 * @param LoggerInterface $logger
76 * @param string $table
77 * @param string $idField
78 * @param string $nameField
79 * @param callable|null $normalizationCallback Normalization to be applied to names before being
80 * saved or queried. This should be a callback that accepts and returns a single string.
81 * @param bool|string $dbDomain Database domain ID. Use false for the local database domain.
82 * @param callable|null $insertCallback Callback to change insert fields accordingly.
83 * This parameter was introduced in 1.32
84 */
85 public function __construct(
86 ILoadBalancer $dbLoadBalancer,
87 WANObjectCache $cache,
88 LoggerInterface $logger,
89 $table,
90 $idField,
91 $nameField,
92 callable $normalizationCallback = null,
93 $dbDomain = false,
94 callable $insertCallback = null
95 ) {
96 $this->loadBalancer = $dbLoadBalancer;
97 $this->cache = $cache;
98 $this->logger = $logger;
99 $this->table = $table;
100 $this->idField = $idField;
101 $this->nameField = $nameField;
102 $this->normalizationCallback = $normalizationCallback;
103 $this->domain = $dbDomain;
104 $this->cacheTTL = IExpiringStore::TTL_MONTH;
105 $this->insertCallback = $insertCallback;
106 }
107
108 /**
109 * @param int $index A database index, like DB_MASTER or DB_REPLICA
110 * @param int $flags Database connection flags
111 *
112 * @return IDatabase
113 */
114 private function getDBConnection( $index, $flags = 0 ) {
115 return $this->loadBalancer->getConnectionRef( $index, [], $this->domain, $flags );
116 }
117
118 /**
119 * Gets the cache key for names.
120 *
121 * The cache key is constructed based on the wiki ID passed to the constructor, and allows
122 * sharing of name tables cached for a specific database between wikis.
123 *
124 * @return string
125 */
126 private function getCacheKey() {
127 return $this->cache->makeGlobalKey(
128 'NameTableSqlStore',
129 $this->table,
130 $this->loadBalancer->resolveDomainID( $this->domain )
131 );
132 }
133
134 /**
135 * @param string $name
136 * @return string
137 */
138 private function normalizeName( $name ) {
139 if ( $this->normalizationCallback === null ) {
140 return $name;
141 }
142 return call_user_func( $this->normalizationCallback, $name );
143 }
144
145 /**
146 * Acquire the id of the given name.
147 * This creates a row in the table if it doesn't already exist.
148 *
149 * @note If called within an atomic section, there is a chance for the acquired ID
150 * to be lost on rollback. A best effort is made to re-insert the mapping
151 * in this case, and consistency of the cache with the database table is ensured
152 * by re-loading the map after a failed atomic section. However, there is no guarantee
153 * that an ID returned by this method is valid outside the transaction in which it
154 * was produced. This means that calling code should not retain the return value beyond
155 * the scope of a transaction, but rather call acquireId() again after the transaction
156 * is complete. In some rare cases, this may produce an ID different from the first call.
157 *
158 * @param string $name
159 * @throws NameTableAccessException
160 * @return int
161 */
162 public function acquireId( $name ) {
163 Assert::parameterType( 'string', $name, '$name' );
164 $name = $this->normalizeName( $name );
165
166 $table = $this->getTableFromCachesOrReplica();
167 $searchResult = array_search( $name, $table, true );
168 if ( $searchResult === false ) {
169 $id = $this->store( $name );
170 if ( $id === null ) {
171 // RACE: $name was already in the db, probably just inserted, so load from master.
172 // Use DBO_TRX to avoid missing inserts due to other threads or REPEATABLE-READs.
173 $table = $this->reloadMap( ILoadBalancer::CONN_TRX_AUTOCOMMIT );
174
175 $searchResult = array_search( $name, $table, true );
176 if ( $searchResult === false ) {
177 // Insert failed due to IGNORE flag, but DB_MASTER didn't give us the data
178 $m = "No insert possible but master didn't give us a record for " .
179 "'{$name}' in '{$this->table}'";
180 $this->logger->error( $m );
181 throw new NameTableAccessException( $m );
182 }
183 } else {
184 if ( isset( $table[$id] ) ) {
185 // This can happen when a transaction is rolled back and acquireId is called in
186 // an onTransactionResolution() callback, which gets executed before retryStore()
187 // has a chance to run. The right thing to do in this case is to discard the old
188 // value. According to the contract of acquireId, the caller should not have
189 // used it outside the transaction, so it should not be persisted anywhere after
190 // the rollback.
191 $m = "Got ID $id for '$name' from insert"
192 . " into '{$this->table}', but ID $id was previously associated with"
193 . " the name '{$table[$id]}'. Overriding the old value, which presumably"
194 . " has been removed from the database due to a transaction rollback.";
195
196 $this->logger->warning( $m );
197 }
198
199 $table[$id] = $name;
200 $searchResult = $id;
201
202 // As store returned an ID we know we inserted so delete from WAN cache
203 $dbw = $this->getDBConnection( DB_MASTER );
204 $dbw->onTransactionPreCommitOrIdle( function () {
205 $this->cache->delete( $this->getCacheKey() );
206 } );
207 }
208 $this->tableCache = $table;
209 }
210
211 return $searchResult;
212 }
213
214 /**
215 * Reloads the name table from the master database, and purges the WAN cache entry.
216 *
217 * @note This should only be called in situations where the local cache has been detected
218 * to be out of sync with the database. There should be no reason to call this method
219 * from outside the NameTabelStore during normal operation. This method may however be
220 * useful in unit tests.
221 *
222 * @param int $connFlags ILoadBalancer::CONN_XXX flags. Optional.
223 *
224 * @return string[] The freshly reloaded name map
225 */
226 public function reloadMap( $connFlags = 0 ) {
227 if ( $connFlags !== 0 && defined( 'MW_PHPUNIT_TEST' ) ) {
228 // HACK: We can't use $connFlags while doing PHPUnit tests, because the
229 // fake database tables are bound to a single connection.
230 $connFlags = 0;
231 }
232
233 $dbw = $this->getDBConnection( DB_MASTER, $connFlags );
234 $this->tableCache = $this->loadTable( $dbw );
235 $dbw->onTransactionPreCommitOrIdle( function () {
236 $this->cache->reap( $this->getCacheKey(), INF );
237 } );
238
239 return $this->tableCache;
240 }
241
242 /**
243 * Get the id of the given name.
244 * If the name doesn't exist this will throw.
245 * This should be used in cases where we believe the name already exists or want to check for
246 * existence.
247 *
248 * @param string $name
249 * @throws NameTableAccessException The name does not exist
250 * @return int Id
251 */
252 public function getId( $name ) {
253 Assert::parameterType( 'string', $name, '$name' );
254 $name = $this->normalizeName( $name );
255
256 $table = $this->getTableFromCachesOrReplica();
257 $searchResult = array_search( $name, $table, true );
258
259 if ( $searchResult !== false ) {
260 return $searchResult;
261 }
262
263 throw NameTableAccessException::newFromDetails( $this->table, 'name', $name );
264 }
265
266 /**
267 * Get the name of the given id.
268 * If the id doesn't exist this will throw.
269 * This should be used in cases where we believe the id already exists.
270 *
271 * Note: Calls to this method will result in a master select for non existing IDs.
272 *
273 * @param int $id
274 * @throws NameTableAccessException The id does not exist
275 * @return string name
276 */
277 public function getName( $id ) {
278 Assert::parameterType( 'integer', $id, '$id' );
279
280 $table = $this->getTableFromCachesOrReplica();
281 if ( array_key_exists( $id, $table ) ) {
282 return $table[$id];
283 }
284 $fname = __METHOD__;
285
286 $table = $this->cache->getWithSetCallback(
287 $this->getCacheKey(),
288 $this->cacheTTL,
289 function ( $oldValue, &$ttl, &$setOpts ) use ( $id, $fname ) {
290 // Check if cached value is up-to-date enough to have $id
291 if ( is_array( $oldValue ) && array_key_exists( $id, $oldValue ) ) {
292 // Completely leave the cache key alone
293 $ttl = WANObjectCache::TTL_UNCACHEABLE;
294 // Use the old value
295 return $oldValue;
296 }
297 // Regenerate from replica DB, and master DB if needed
298 foreach ( [ DB_REPLICA, DB_MASTER ] as $source ) {
299 // Log a fallback to master
300 if ( $source === DB_MASTER ) {
301 $this->logger->info(
302 $fname . ' falling back to master select from ' .
303 $this->table . ' with id ' . $id
304 );
305 }
306 $db = $this->getDBConnection( $source );
307 $cacheSetOpts = Database::getCacheSetOptions( $db );
308 $table = $this->loadTable( $db );
309 if ( array_key_exists( $id, $table ) ) {
310 break; // found it
311 }
312 }
313 // Use the value from last source checked
314 $setOpts += $cacheSetOpts;
315
316 return $table;
317 },
318 [ 'minAsOf' => INF ] // force callback run
319 );
320
321 $this->tableCache = $table;
322
323 if ( array_key_exists( $id, $table ) ) {
324 return $table[$id];
325 }
326
327 throw NameTableAccessException::newFromDetails( $this->table, 'id', $id );
328 }
329
330 /**
331 * Get the whole table, in no particular order as a map of ids to names.
332 * This method could be subject to DB or cache lag.
333 *
334 * @return string[] keys are the name ids, values are the names themselves
335 * Example: [ 1 => 'foo', 3 => 'bar' ]
336 */
337 public function getMap() {
338 return $this->getTableFromCachesOrReplica();
339 }
340
341 /**
342 * @return string[]
343 */
344 private function getTableFromCachesOrReplica() {
345 if ( $this->tableCache !== null ) {
346 return $this->tableCache;
347 }
348
349 $table = $this->cache->getWithSetCallback(
350 $this->getCacheKey(),
351 $this->cacheTTL,
352 function ( $oldValue, &$ttl, &$setOpts ) {
353 $dbr = $this->getDBConnection( DB_REPLICA );
354 $setOpts += Database::getCacheSetOptions( $dbr );
355 return $this->loadTable( $dbr );
356 }
357 );
358
359 $this->tableCache = $table;
360
361 return $table;
362 }
363
364 /**
365 * Gets the table from the db
366 *
367 * @param IDatabase $db
368 *
369 * @return string[]
370 */
371 private function loadTable( IDatabase $db ) {
372 $result = $db->select(
373 $this->table,
374 [
375 'id' => $this->idField,
376 'name' => $this->nameField
377 ],
378 [],
379 __METHOD__,
380 [ 'ORDER BY' => 'id' ]
381 );
382
383 $assocArray = [];
384 foreach ( $result as $row ) {
385 $assocArray[$row->id] = $row->name;
386 }
387
388 return $assocArray;
389 }
390
391 /**
392 * Stores the given name in the DB, returning the ID when an insert occurs.
393 *
394 * @param string $name
395 * @return int|null int if we know the ID, null if we don't
396 */
397 private function store( $name ) {
398 Assert::parameterType( 'string', $name, '$name' );
399 Assert::parameter( $name !== '', '$name', 'should not be an empty string' );
400 // Note: this is only called internally so normalization of $name has already occurred.
401
402 $dbw = $this->getDBConnection( DB_MASTER );
403
404 $id = null;
405 $dbw->doAtomicSection(
406 __METHOD__,
407 function ( IDatabase $unused, $fname )
408 use ( $name, &$id, $dbw ) {
409 // NOTE: use IDatabase from the parent scope here, not the function parameter.
410 // If $dbw is a wrapper around the actual DB, we need to call the wrapper here,
411 // not the inner instance.
412 $dbw->insert(
413 $this->table,
414 $this->getFieldsToStore( $name ),
415 $fname,
416 [ 'IGNORE' ]
417 );
418
419 if ( $dbw->affectedRows() === 0 ) {
420 $this->logger->info(
421 'Tried to insert name into table ' . $this->table . ', but value already existed.'
422 );
423
424 return;
425 }
426
427 $id = $dbw->insertId();
428
429 // Any open transaction may still be rolled back. If that happens, we have to re-try the
430 // insertion and restore a consistent state of the cached table.
431 $dbw->onAtomicSectionCancel(
432 function ( $trigger, IDatabase $unused ) use ( $name, $id, $dbw ) {
433 $this->retryStore( $dbw, $name, $id );
434 },
435 $fname );
436 },
437 IDatabase::ATOMIC_CANCELABLE
438 );
439
440 return $id;
441 }
442
443 /**
444 * After the initial insertion got rolled back, this can be used to try the insertion again,
445 * and ensure a consistent state of the cache.
446 *
447 * @param IDatabase $dbw
448 * @param string $name
449 * @param int $id
450 */
451 private function retryStore( IDatabase $dbw, $name, $id ) {
452 // NOTE: in the closure below, use the IDatabase from the original method call,
453 // not the one passed to the closure as a parameter.
454 // If $dbw is a wrapper around the actual DB, we need to call the wrapper,
455 // not the inner instance.
456
457 try {
458 $dbw->doAtomicSection(
459 __METHOD__,
460 function ( IDatabase $unused, $fname ) use ( $name, $id, &$ok, $dbw ) {
461 // Try to insert a row with the ID we originally got.
462 // If that fails (because of a key conflict), we will just try to get another ID again later.
463 $dbw->insert(
464 $this->table,
465 $this->getFieldsToStore( $name, $id ),
466 $fname
467 );
468
469 // Make sure we re-load the map in case this gets rolled back again.
470 // We could re-try once more, but that bears the risk of an infinite loop.
471 // So let's just give up on the ID.
472 $dbw->onAtomicSectionCancel(
473 function ( $trigger, IDatabase $unused ) use ( $name, $id, $dbw ) {
474 $this->logger->warning(
475 'Re-insertion of name into table ' . $this->table
476 . ' was rolled back. Giving up and reloading the cache.'
477 );
478 $this->reloadMap( ILoadBalancer::CONN_TRX_AUTOCOMMIT );
479 },
480 $fname
481 );
482
483 $this->logger->info(
484 'Re-insert name into table ' . $this->table . ' after failed transaction.'
485 );
486 },
487 IDatabase::ATOMIC_CANCELABLE
488 );
489 } catch ( Exception $ex ) {
490 $this->logger->error(
491 'Re-insertion of name into table ' . $this->table . ' failed: ' . $ex->getMessage()
492 );
493 } finally {
494 // NOTE: we reload regardless of whether the above insert succeeded. There is
495 // only three possibilities: the insert succeeded, so the new map will have
496 // the desired $id/$name mapping. Or the insert failed because another
497 // process already inserted that same $id/$name mapping, in which case the
498 // new map will also have it. Or another process grabbed the desired ID for
499 // another name, or the database refuses to insert the given ID into the
500 // auto increment field - in that case, the new map will not have a mapping
501 // for $name (or has a different mapping for $name). In that last case, we can
502 // only hope that the ID produced within the failed transaction has not been
503 // used outside that transaction.
504
505 $this->reloadMap( ILoadBalancer::CONN_TRX_AUTOCOMMIT );
506 }
507 }
508
509 /**
510 * @param string $name
511 * @param int|null $id
512 * @return array
513 */
514 private function getFieldsToStore( $name, $id = null ) {
515 $fields = [];
516
517 $fields[$this->nameField] = $name;
518
519 if ( $id !== null ) {
520 $fields[$this->idField] = $id;
521 }
522
523 if ( $this->insertCallback !== null ) {
524 $fields = call_user_func( $this->insertCallback, $fields );
525 }
526 return $fields;
527 }
528
529 }