Merge "language: Add generate-normalize-data maintenance script"
[lhc/web/wiklou.git] / includes / Storage / NameTableStore.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 */
20
21 namespace MediaWiki\Storage;
22
23 use IExpiringStore;
24 use Psr\Log\LoggerInterface;
25 use WANObjectCache;
26 use Wikimedia\Assert\Assert;
27 use Wikimedia\Rdbms\Database;
28 use Wikimedia\Rdbms\IDatabase;
29 use Wikimedia\Rdbms\ILoadBalancer;
30 use Wikimedia\Rdbms\LoadBalancer;
31
32 /**
33 * @author Addshore
34 * @since 1.31
35 */
36 class NameTableStore {
37
38 /** @var LoadBalancer */
39 private $loadBalancer;
40
41 /** @var WANObjectCache */
42 private $cache;
43
44 /** @var LoggerInterface */
45 private $logger;
46
47 /** @var string[] */
48 private $tableCache = null;
49
50 /** @var bool|string */
51 private $wikiId = false;
52
53 /** @var int */
54 private $cacheTTL;
55
56 /** @var string */
57 private $table;
58 /** @var string */
59 private $idField;
60 /** @var string */
61 private $nameField;
62 /** @var null|callable */
63 private $normalizationCallback = null;
64 /** @var null|callable */
65 private $insertCallback = null;
66
67 /**
68 * @param ILoadBalancer $dbLoadBalancer A load balancer for acquiring database connections
69 * @param WANObjectCache $cache A cache manager for caching data. This can be the local
70 * wiki's default instance even if $wikiId refers to a different wiki, since
71 * makeGlobalKey() is used to constructed a key that allows cached names from
72 * the same database to be re-used between wikis. For example, enwiki and frwiki will
73 * use the same cache keys for names from the wikidatawiki database, regardless
74 * of the cache's default key space.
75 * @param LoggerInterface $logger
76 * @param string $table
77 * @param string $idField
78 * @param string $nameField
79 * @param callable|null $normalizationCallback Normalization to be applied to names before being
80 * saved or queried. This should be a callback that accepts and returns a single string.
81 * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki.
82 * @param callable|null $insertCallback Callback to change insert fields accordingly.
83 * This parameter was introduced in 1.32
84 */
85 public function __construct(
86 ILoadBalancer $dbLoadBalancer,
87 WANObjectCache $cache,
88 LoggerInterface $logger,
89 $table,
90 $idField,
91 $nameField,
92 callable $normalizationCallback = null,
93 $wikiId = false,
94 callable $insertCallback = null
95 ) {
96 $this->loadBalancer = $dbLoadBalancer;
97 $this->cache = $cache;
98 $this->logger = $logger;
99 $this->table = $table;
100 $this->idField = $idField;
101 $this->nameField = $nameField;
102 $this->normalizationCallback = $normalizationCallback;
103 $this->wikiId = $wikiId;
104 $this->cacheTTL = IExpiringStore::TTL_MONTH;
105 $this->insertCallback = $insertCallback;
106 }
107
108 /**
109 * @param int $index A database index, like DB_MASTER or DB_REPLICA
110 * @param int $flags Database connection flags
111 *
112 * @return IDatabase
113 */
114 private function getDBConnection( $index, $flags = 0 ) {
115 return $this->loadBalancer->getConnection( $index, [], $this->wikiId, $flags );
116 }
117
118 /**
119 * Gets the cache key for names.
120 *
121 * The cache key is constructed based on the wiki ID passed to the constructor, and allows
122 * sharing of name tables cached for a specific database between wikis.
123 *
124 * @return string
125 */
126 private function getCacheKey() {
127 return $this->cache->makeGlobalKey(
128 'NameTableSqlStore',
129 $this->table,
130 $this->loadBalancer->resolveDomainID( $this->wikiId )
131 );
132 }
133
134 /**
135 * @param string $name
136 * @return string
137 */
138 private function normalizeName( $name ) {
139 if ( $this->normalizationCallback === null ) {
140 return $name;
141 }
142 return call_user_func( $this->normalizationCallback, $name );
143 }
144
145 /**
146 * Acquire the id of the given name.
147 * This creates a row in the table if it doesn't already exist.
148 *
149 * @param string $name
150 * @throws NameTableAccessException
151 * @return int
152 */
153 public function acquireId( $name ) {
154 Assert::parameterType( 'string', $name, '$name' );
155 $name = $this->normalizeName( $name );
156
157 $table = $this->getTableFromCachesOrReplica();
158 $searchResult = array_search( $name, $table, true );
159 if ( $searchResult === false ) {
160 $id = $this->store( $name );
161 if ( $id === null ) {
162 // RACE: $name was already in the db, probably just inserted, so load from master
163 // Use DBO_TRX to avoid missing inserts due to other threads or REPEATABLE-READs
164 $table = $this->loadTable(
165 $this->getDBConnection( DB_MASTER, LoadBalancer::CONN_TRX_AUTOCOMMIT )
166 );
167 $searchResult = array_search( $name, $table, true );
168 if ( $searchResult === false ) {
169 // Insert failed due to IGNORE flag, but DB_MASTER didn't give us the data
170 $m = "No insert possible but master didn't give us a record for " .
171 "'{$name}' in '{$this->table}'";
172 $this->logger->error( $m );
173 throw new NameTableAccessException( $m );
174 }
175 $this->purgeWANCache(
176 function () {
177 $this->cache->reap( $this->getCacheKey(), INF );
178 }
179 );
180 } else {
181 $table[$id] = $name;
182 $searchResult = $id;
183 // As store returned an ID we know we inserted so delete from WAN cache
184 $this->purgeWANCache(
185 function () {
186 $this->cache->delete( $this->getCacheKey() );
187 }
188 );
189 }
190 $this->tableCache = $table;
191 }
192
193 return $searchResult;
194 }
195
196 /**
197 * Get the id of the given name.
198 * If the name doesn't exist this will throw.
199 * This should be used in cases where we believe the name already exists or want to check for
200 * existence.
201 *
202 * @param string $name
203 * @throws NameTableAccessException The name does not exist
204 * @return int Id
205 */
206 public function getId( $name ) {
207 Assert::parameterType( 'string', $name, '$name' );
208 $name = $this->normalizeName( $name );
209
210 $table = $this->getTableFromCachesOrReplica();
211 $searchResult = array_search( $name, $table, true );
212
213 if ( $searchResult !== false ) {
214 return $searchResult;
215 }
216
217 throw NameTableAccessException::newFromDetails( $this->table, 'name', $name );
218 }
219
220 /**
221 * Get the name of the given id.
222 * If the id doesn't exist this will throw.
223 * This should be used in cases where we believe the id already exists.
224 *
225 * Note: Calls to this method will result in a master select for non existing IDs.
226 *
227 * @param int $id
228 * @throws NameTableAccessException The id does not exist
229 * @return string name
230 */
231 public function getName( $id ) {
232 Assert::parameterType( 'integer', $id, '$id' );
233
234 $table = $this->getTableFromCachesOrReplica();
235 if ( array_key_exists( $id, $table ) ) {
236 return $table[$id];
237 }
238
239 $table = $this->cache->getWithSetCallback(
240 $this->getCacheKey(),
241 $this->cacheTTL,
242 function ( $oldValue, &$ttl, &$setOpts ) use ( $id ) {
243 // Check if cached value is up-to-date enough to have $id
244 if ( is_array( $oldValue ) && array_key_exists( $id, $oldValue ) ) {
245 // Completely leave the cache key alone
246 $ttl = WANObjectCache::TTL_UNCACHEABLE;
247 // Use the old value
248 return $oldValue;
249 }
250 // Regenerate from replica DB, and master DB if needed
251 foreach ( [ DB_REPLICA, DB_MASTER ] as $source ) {
252 // Log a fallback to master
253 if ( $source === DB_MASTER ) {
254 $this->logger->info(
255 __METHOD__ . 'falling back to master select from ' .
256 $this->table . ' with id ' . $id
257 );
258 }
259 $db = $this->getDBConnection( $source );
260 $cacheSetOpts = Database::getCacheSetOptions( $db );
261 $table = $this->loadTable( $db );
262 if ( array_key_exists( $id, $table ) ) {
263 break; // found it
264 }
265 }
266 // Use the value from last source checked
267 $setOpts += $cacheSetOpts;
268
269 return $table;
270 },
271 [ 'minAsOf' => INF ] // force callback run
272 );
273
274 $this->tableCache = $table;
275
276 if ( array_key_exists( $id, $table ) ) {
277 return $table[$id];
278 }
279
280 throw NameTableAccessException::newFromDetails( $this->table, 'id', $id );
281 }
282
283 /**
284 * Get the whole table, in no particular order as a map of ids to names.
285 * This method could be subject to DB or cache lag.
286 *
287 * @return string[] keys are the name ids, values are the names themselves
288 * Example: [ 1 => 'foo', 3 => 'bar' ]
289 */
290 public function getMap() {
291 return $this->getTableFromCachesOrReplica();
292 }
293
294 /**
295 * @return string[]
296 */
297 private function getTableFromCachesOrReplica() {
298 if ( $this->tableCache !== null ) {
299 return $this->tableCache;
300 }
301
302 $table = $this->cache->getWithSetCallback(
303 $this->getCacheKey(),
304 $this->cacheTTL,
305 function ( $oldValue, &$ttl, &$setOpts ) {
306 $dbr = $this->getDBConnection( DB_REPLICA );
307 $setOpts += Database::getCacheSetOptions( $dbr );
308 return $this->loadTable( $dbr );
309 }
310 );
311
312 $this->tableCache = $table;
313
314 return $table;
315 }
316
317 /**
318 * Reap the WANCache entry for this table.
319 *
320 * @param callable $purgeCallback callback to 'purge' the WAN cache
321 */
322 private function purgeWANCache( $purgeCallback ) {
323 // If the LB has no DB changes don't both with onTransactionPreCommitOrIdle
324 if ( !$this->loadBalancer->hasOrMadeRecentMasterChanges() ) {
325 $purgeCallback();
326 return;
327 }
328
329 $this->getDBConnection( DB_MASTER )
330 ->onTransactionPreCommitOrIdle( $purgeCallback, __METHOD__ );
331 }
332
333 /**
334 * Gets the table from the db
335 *
336 * @param IDatabase $db
337 *
338 * @return string[]
339 */
340 private function loadTable( IDatabase $db ) {
341 $result = $db->select(
342 $this->table,
343 [
344 'id' => $this->idField,
345 'name' => $this->nameField
346 ],
347 [],
348 __METHOD__,
349 [ 'ORDER BY' => 'id' ]
350 );
351
352 $assocArray = [];
353 foreach ( $result as $row ) {
354 $assocArray[$row->id] = $row->name;
355 }
356
357 return $assocArray;
358 }
359
360 /**
361 * Stores the given name in the DB, returning the ID when an insert occurs.
362 *
363 * @param string $name
364 * @return int|null int if we know the ID, null if we don't
365 */
366 private function store( $name ) {
367 Assert::parameterType( 'string', $name, '$name' );
368 Assert::parameter( $name !== '', '$name', 'should not be an empty string' );
369 // Note: this is only called internally so normalization of $name has already occurred.
370
371 $dbw = $this->getDBConnection( DB_MASTER );
372
373 $dbw->insert(
374 $this->table,
375 $this->getFieldsToStore( $name ),
376 __METHOD__,
377 [ 'IGNORE' ]
378 );
379
380 if ( $dbw->affectedRows() === 0 ) {
381 $this->logger->info(
382 'Tried to insert name into table ' . $this->table . ', but value already existed.'
383 );
384 return null;
385 }
386
387 return $dbw->insertId();
388 }
389
390 /**
391 * @param string $name
392 * @return array
393 */
394 private function getFieldsToStore( $name ) {
395 $fields = [ $this->nameField => $name ];
396 if ( $this->insertCallback !== null ) {
397 $fields = call_user_func( $this->insertCallback, $fields );
398 }
399 return $fields;
400 }
401
402 }