3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
21 namespace MediaWiki\Storage
;
25 use Psr\Log\LoggerInterface
;
27 use Wikimedia\Assert\Assert
;
28 use Wikimedia\Rdbms\Database
;
29 use Wikimedia\Rdbms\IDatabase
;
30 use Wikimedia\Rdbms\ILoadBalancer
;
36 class NameTableStore
{
38 /** @var ILoadBalancer */
39 private $loadBalancer;
41 /** @var WANObjectCache */
44 /** @var LoggerInterface */
48 private $tableCache = null;
50 /** @var bool|string */
51 private $domain = false;
62 /** @var null|callable */
63 private $normalizationCallback = null;
64 /** @var null|callable */
65 private $insertCallback = null;
68 * @param ILoadBalancer $dbLoadBalancer A load balancer for acquiring database connections
69 * @param WANObjectCache $cache A cache manager for caching data. This can be the local
70 * wiki's default instance even if $dbDomain refers to a different wiki, since
71 * makeGlobalKey() is used to constructed a key that allows cached names from
72 * the same database to be re-used between wikis. For example, enwiki and frwiki will
73 * use the same cache keys for names from the wikidatawiki database, regardless
74 * of the cache's default key space.
75 * @param LoggerInterface $logger
76 * @param string $table
77 * @param string $idField
78 * @param string $nameField
79 * @param callable|null $normalizationCallback Normalization to be applied to names before being
80 * saved or queried. This should be a callback that accepts and returns a single string.
81 * @param bool|string $dbDomain Database domain ID. Use false for the local database domain.
82 * @param callable|null $insertCallback Callback to change insert fields accordingly.
83 * This parameter was introduced in 1.32
85 public function __construct(
86 ILoadBalancer
$dbLoadBalancer,
87 WANObjectCache
$cache,
88 LoggerInterface
$logger,
92 callable
$normalizationCallback = null,
94 callable
$insertCallback = null
96 $this->loadBalancer
= $dbLoadBalancer;
97 $this->cache
= $cache;
98 $this->logger
= $logger;
99 $this->table
= $table;
100 $this->idField
= $idField;
101 $this->nameField
= $nameField;
102 $this->normalizationCallback
= $normalizationCallback;
103 $this->domain
= $dbDomain;
104 $this->cacheTTL
= IExpiringStore
::TTL_MONTH
;
105 $this->insertCallback
= $insertCallback;
109 * @param int $index A database index, like DB_MASTER or DB_REPLICA
110 * @param int $flags Database connection flags
114 private function getDBConnection( $index, $flags = 0 ) {
115 return $this->loadBalancer
->getConnectionRef( $index, [], $this->domain
, $flags );
119 * Gets the cache key for names.
121 * The cache key is constructed based on the wiki ID passed to the constructor, and allows
122 * sharing of name tables cached for a specific database between wikis.
126 private function getCacheKey() {
127 return $this->cache
->makeGlobalKey(
130 $this->loadBalancer
->resolveDomainID( $this->domain
)
135 * @param string $name
138 private function normalizeName( $name ) {
139 if ( $this->normalizationCallback
=== null ) {
142 return call_user_func( $this->normalizationCallback
, $name );
146 * Acquire the id of the given name.
147 * This creates a row in the table if it doesn't already exist.
149 * @note If called within an atomic section, there is a chance for the acquired ID
150 * to be lost on rollback. A best effort is made to re-insert the mapping
151 * in this case, and consistency of the cache with the database table is ensured
152 * by re-loading the map after a failed atomic section. However, there is no guarantee
153 * that an ID returned by this method is valid outside the transaction in which it
154 * was produced. This means that calling code should not retain the return value beyond
155 * the scope of a transaction, but rather call acquireId() again after the transaction
156 * is complete. In some rare cases, this may produce an ID different from the first call.
158 * @param string $name
159 * @throws NameTableAccessException
162 public function acquireId( $name ) {
163 Assert
::parameterType( 'string', $name, '$name' );
164 $name = $this->normalizeName( $name );
166 $table = $this->getTableFromCachesOrReplica();
167 $searchResult = array_search( $name, $table, true );
168 if ( $searchResult === false ) {
169 $id = $this->store( $name );
170 if ( $id === null ) {
171 // RACE: $name was already in the db, probably just inserted, so load from master.
172 // Use DBO_TRX to avoid missing inserts due to other threads or REPEATABLE-READs.
173 $table = $this->reloadMap( ILoadBalancer
::CONN_TRX_AUTOCOMMIT
);
175 $searchResult = array_search( $name, $table, true );
176 if ( $searchResult === false ) {
177 // Insert failed due to IGNORE flag, but DB_MASTER didn't give us the data
178 $m = "No insert possible but master didn't give us a record for " .
179 "'{$name}' in '{$this->table}'";
180 $this->logger
->error( $m );
181 throw new NameTableAccessException( $m );
184 if ( isset( $table[$id] ) ) {
185 // This can happen when a transaction is rolled back and acquireId is called in
186 // an onTransactionResolution() callback, which gets executed before retryStore()
187 // has a chance to run. The right thing to do in this case is to discard the old
188 // value. According to the contract of acquireId, the caller should not have
189 // used it outside the transaction, so it should not be persisted anywhere after
191 $m = "Got ID $id for '$name' from insert"
192 . " into '{$this->table}', but ID $id was previously associated with"
193 . " the name '{$table[$id]}'. Overriding the old value, which presumably"
194 . " has been removed from the database due to a transaction rollback.";
196 $this->logger
->warning( $m );
202 // As store returned an ID we know we inserted so delete from WAN cache
203 $dbw = $this->getDBConnection( DB_MASTER
);
204 $dbw->onTransactionPreCommitOrIdle( function () {
205 $this->cache
->delete( $this->getCacheKey() );
208 $this->tableCache
= $table;
211 return $searchResult;
215 * Reloads the name table from the master database, and purges the WAN cache entry.
217 * @note This should only be called in situations where the local cache has been detected
218 * to be out of sync with the database. There should be no reason to call this method
219 * from outside the NameTabelStore during normal operation. This method may however be
220 * useful in unit tests.
222 * @param int $connFlags ILoadBalancer::CONN_XXX flags. Optional.
224 * @return string[] The freshly reloaded name map
226 public function reloadMap( $connFlags = 0 ) {
227 if ( $connFlags !== 0 && defined( 'MW_PHPUNIT_TEST' ) ) {
228 // HACK: We can't use $connFlags while doing PHPUnit tests, because the
229 // fake database tables are bound to a single connection.
233 $dbw = $this->getDBConnection( DB_MASTER
, $connFlags );
234 $this->tableCache
= $this->loadTable( $dbw );
235 $dbw->onTransactionPreCommitOrIdle( function () {
236 $this->cache
->reap( $this->getCacheKey(), INF
);
239 return $this->tableCache
;
243 * Get the id of the given name.
244 * If the name doesn't exist this will throw.
245 * This should be used in cases where we believe the name already exists or want to check for
248 * @param string $name
249 * @throws NameTableAccessException The name does not exist
252 public function getId( $name ) {
253 Assert
::parameterType( 'string', $name, '$name' );
254 $name = $this->normalizeName( $name );
256 $table = $this->getTableFromCachesOrReplica();
257 $searchResult = array_search( $name, $table, true );
259 if ( $searchResult !== false ) {
260 return $searchResult;
263 throw NameTableAccessException
::newFromDetails( $this->table
, 'name', $name );
267 * Get the name of the given id.
268 * If the id doesn't exist this will throw.
269 * This should be used in cases where we believe the id already exists.
271 * Note: Calls to this method will result in a master select for non existing IDs.
274 * @throws NameTableAccessException The id does not exist
275 * @return string name
277 public function getName( $id ) {
278 Assert
::parameterType( 'integer', $id, '$id' );
280 $table = $this->getTableFromCachesOrReplica();
281 if ( array_key_exists( $id, $table ) ) {
286 $table = $this->cache
->getWithSetCallback(
287 $this->getCacheKey(),
289 function ( $oldValue, &$ttl, &$setOpts ) use ( $id, $fname ) {
290 // Check if cached value is up-to-date enough to have $id
291 if ( is_array( $oldValue ) && array_key_exists( $id, $oldValue ) ) {
292 // Completely leave the cache key alone
293 $ttl = WANObjectCache
::TTL_UNCACHEABLE
;
297 // Regenerate from replica DB, and master DB if needed
298 foreach ( [ DB_REPLICA
, DB_MASTER
] as $source ) {
299 // Log a fallback to master
300 if ( $source === DB_MASTER
) {
302 $fname . ' falling back to master select from ' .
303 $this->table
. ' with id ' . $id
306 $db = $this->getDBConnection( $source );
307 $cacheSetOpts = Database
::getCacheSetOptions( $db );
308 $table = $this->loadTable( $db );
309 if ( array_key_exists( $id, $table ) ) {
313 // Use the value from last source checked
314 $setOpts +
= $cacheSetOpts;
318 [ 'minAsOf' => INF
] // force callback run
321 $this->tableCache
= $table;
323 if ( array_key_exists( $id, $table ) ) {
327 throw NameTableAccessException
::newFromDetails( $this->table
, 'id', $id );
331 * Get the whole table, in no particular order as a map of ids to names.
332 * This method could be subject to DB or cache lag.
334 * @return string[] keys are the name ids, values are the names themselves
335 * Example: [ 1 => 'foo', 3 => 'bar' ]
337 public function getMap() {
338 return $this->getTableFromCachesOrReplica();
344 private function getTableFromCachesOrReplica() {
345 if ( $this->tableCache
!== null ) {
346 return $this->tableCache
;
349 $table = $this->cache
->getWithSetCallback(
350 $this->getCacheKey(),
352 function ( $oldValue, &$ttl, &$setOpts ) {
353 $dbr = $this->getDBConnection( DB_REPLICA
);
354 $setOpts +
= Database
::getCacheSetOptions( $dbr );
355 return $this->loadTable( $dbr );
359 $this->tableCache
= $table;
365 * Gets the table from the db
367 * @param IDatabase $db
371 private function loadTable( IDatabase
$db ) {
372 $result = $db->select(
375 'id' => $this->idField
,
376 'name' => $this->nameField
380 [ 'ORDER BY' => 'id' ]
384 foreach ( $result as $row ) {
385 $assocArray[$row->id
] = $row->name
;
392 * Stores the given name in the DB, returning the ID when an insert occurs.
394 * @param string $name
395 * @return int|null int if we know the ID, null if we don't
397 private function store( $name ) {
398 Assert
::parameterType( 'string', $name, '$name' );
399 Assert
::parameter( $name !== '', '$name', 'should not be an empty string' );
400 // Note: this is only called internally so normalization of $name has already occurred.
402 $dbw = $this->getDBConnection( DB_MASTER
);
405 $dbw->doAtomicSection(
407 function ( IDatabase
$unused, $fname )
408 use ( $name, &$id, $dbw ) {
409 // NOTE: use IDatabase from the parent scope here, not the function parameter.
410 // If $dbw is a wrapper around the actual DB, we need to call the wrapper here,
411 // not the inner instance.
414 $this->getFieldsToStore( $name ),
419 if ( $dbw->affectedRows() === 0 ) {
421 'Tried to insert name into table ' . $this->table
. ', but value already existed.'
427 $id = $dbw->insertId();
429 // Any open transaction may still be rolled back. If that happens, we have to re-try the
430 // insertion and restore a consistent state of the cached table.
431 $dbw->onAtomicSectionCancel(
432 function ( $trigger, IDatabase
$unused ) use ( $name, $id, $dbw ) {
433 $this->retryStore( $dbw, $name, $id );
437 IDatabase
::ATOMIC_CANCELABLE
444 * After the initial insertion got rolled back, this can be used to try the insertion again,
445 * and ensure a consistent state of the cache.
447 * @param IDatabase $dbw
448 * @param string $name
451 private function retryStore( IDatabase
$dbw, $name, $id ) {
452 // NOTE: in the closure below, use the IDatabase from the original method call,
453 // not the one passed to the closure as a parameter.
454 // If $dbw is a wrapper around the actual DB, we need to call the wrapper,
455 // not the inner instance.
458 $dbw->doAtomicSection(
460 function ( IDatabase
$unused, $fname ) use ( $name, $id, &$ok, $dbw ) {
461 // Try to insert a row with the ID we originally got.
462 // If that fails (because of a key conflict), we will just try to get another ID again later.
465 $this->getFieldsToStore( $name, $id ),
469 // Make sure we re-load the map in case this gets rolled back again.
470 // We could re-try once more, but that bears the risk of an infinite loop.
471 // So let's just give up on the ID.
472 $dbw->onAtomicSectionCancel(
473 function ( $trigger, IDatabase
$unused ) use ( $name, $id, $dbw ) {
474 $this->logger
->warning(
475 'Re-insertion of name into table ' . $this->table
476 . ' was rolled back. Giving up and reloading the cache.'
478 $this->reloadMap( ILoadBalancer
::CONN_TRX_AUTOCOMMIT
);
484 'Re-insert name into table ' . $this->table
. ' after failed transaction.'
487 IDatabase
::ATOMIC_CANCELABLE
489 } catch ( Exception
$ex ) {
490 $this->logger
->error(
491 'Re-insertion of name into table ' . $this->table
. ' failed: ' . $ex->getMessage()
494 // NOTE: we reload regardless of whether the above insert succeeded. There is
495 // only three possibilities: the insert succeeded, so the new map will have
496 // the desired $id/$name mapping. Or the insert failed because another
497 // process already inserted that same $id/$name mapping, in which case the
498 // new map will also have it. Or another process grabbed the desired ID for
499 // another name, or the database refuses to insert the given ID into the
500 // auto increment field - in that case, the new map will not have a mapping
501 // for $name (or has a different mapping for $name). In that last case, we can
502 // only hope that the ID produced within the failed transaction has not been
503 // used outside that transaction.
505 $this->reloadMap( ILoadBalancer
::CONN_TRX_AUTOCOMMIT
);
510 * @param string $name
511 * @param int|null $id
514 private function getFieldsToStore( $name, $id = null ) {
517 $fields[$this->nameField
] = $name;
519 if ( $id !== null ) {
520 $fields[$this->idField
] = $id;
523 if ( $this->insertCallback
!== null ) {
524 $fields = call_user_func( $this->insertCallback
, $fields );