99556ed4f8c259ea9bc1cece43dff5cdf992fc5a
[lhc/web/wiklou.git] / includes / poolcounter / PoolCounterRedis.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @author Aaron Schulz
20 */
21 use Psr\Log\LoggerInterface;
22
23 /**
24 * Version of PoolCounter that uses Redis
25 *
26 * There are four main redis keys used to track each pool counter key:
27 * - poolcounter:l-slots-* : A list of available slot IDs for a pool.
28 * - poolcounter:z-renewtime-* : A sorted set of (slot ID, UNIX timestamp as score)
29 * used for tracking the next time a slot should be
30 * released. This is -1 when a slot is created, and is
31 * set when released (expired), locked, and unlocked.
32 * - poolcounter:z-wait-* : A sorted set of (slot ID, UNIX timestamp as score)
33 * used for tracking waiting processes (and wait time).
34 * - poolcounter:l-wakeup-* : A list pushed to for the sake of waking up processes
35 * when a any process in the pool finishes (lasts for 1ms).
36 * For a given pool key, all the redis keys start off non-existing and are deleted if not
37 * used for a while to prevent garbage from building up on the server. They are atomically
38 * re-initialized as needed. The "z-renewtime" key is used for detecting sessions which got
39 * slots but then disappeared. Stale entries from there have their timestamp updated and the
40 * corresponding slots freed up. The "z-wait" key is used for detecting processes registered
41 * as waiting but that disappeared. Stale entries from there are deleted and the corresponding
42 * slots are freed up. The worker count is included in all the redis key names as it does not
43 * vary within each $wgPoolCounterConf type and doing so handles configuration changes.
44 *
45 * This class requires Redis 2.6 as it makes use Lua scripts for fast atomic operations.
46 * Also this should be on a server plenty of RAM for the working set to avoid evictions.
47 * Evictions could temporarily allow wait queues to double in size or temporarily cause
48 * pools to appear as full when they are not. Using volatile-ttl and bumping memory-samples
49 * in redis.conf can be helpful otherwise.
50 *
51 * @ingroup Redis
52 * @since 1.23
53 */
54 class PoolCounterRedis extends PoolCounter {
55 /** @var HashRing */
56 protected $ring;
57 /** @var RedisConnectionPool */
58 protected $pool;
59 /** @var LoggerInterface */
60 protected $logger;
61 /** @var array (server label => host) map */
62 protected $serversByLabel;
63 /** @var string SHA-1 of the key */
64 protected $keySha1;
65 /** @var int TTL for locks to expire (work should finish in this time) */
66 protected $lockTTL;
67
68 /** @var RedisConnRef */
69 protected $conn;
70 /** @var string Pool slot value */
71 protected $slot;
72 /** @var int AWAKE_* constant */
73 protected $onRelease;
74 /** @var string Unique string to identify this process */
75 protected $session;
76 /** @var int UNIX timestamp */
77 protected $slotTime;
78
79 const AWAKE_ONE = 1; // wake-up if when a slot can be taken from an existing process
80 const AWAKE_ALL = 2; // wake-up if an existing process finishes and wake up such others
81
82 /** @var PoolCounterRedis[] List of active PoolCounterRedis objects in this script */
83 protected static $active = null;
84
85 function __construct( $conf, $type, $key ) {
86 parent::__construct( $conf, $type, $key );
87
88 $this->serversByLabel = $conf['servers'];
89 $this->ring = new HashRing( array_fill_keys( array_keys( $conf['servers'] ), 100 ) );
90
91 $conf['redisConfig']['serializer'] = 'none'; // for use with Lua
92 $this->pool = RedisConnectionPool::singleton( $conf['redisConfig'] );
93 $this->logger = \MediaWiki\Logger\LoggerFactory::getInstance( 'redis' );
94
95 $this->keySha1 = sha1( $this->key );
96 $met = ini_get( 'max_execution_time' ); // usually 0 in CLI mode
97 $this->lockTTL = $met ? 2 * $met : 3600;
98
99 if ( self::$active === null ) {
100 self::$active = [];
101 register_shutdown_function( [ __CLASS__, 'releaseAll' ] );
102 }
103 }
104
105 /**
106 * @return Status Uses RediConnRef as value on success
107 */
108 protected function getConnection() {
109 if ( !isset( $this->conn ) ) {
110 $conn = false;
111 $servers = $this->ring->getLocations( $this->key, 3 );
112 ArrayUtils::consistentHashSort( $servers, $this->key );
113 foreach ( $servers as $server ) {
114 $conn = $this->pool->getConnection( $this->serversByLabel[$server], $this->logger );
115 if ( $conn ) {
116 break;
117 }
118 }
119 if ( !$conn ) {
120 return Status::newFatal( 'pool-servererror', implode( ', ', $servers ) );
121 }
122 $this->conn = $conn;
123 }
124 return Status::newGood( $this->conn );
125 }
126
127 function acquireForMe() {
128 $status = $this->precheckAcquire();
129 if ( !$status->isGood() ) {
130 return $status;
131 }
132
133 return $this->waitForSlotOrNotif( self::AWAKE_ONE );
134 }
135
136 function acquireForAnyone() {
137 $status = $this->precheckAcquire();
138 if ( !$status->isGood() ) {
139 return $status;
140 }
141
142 return $this->waitForSlotOrNotif( self::AWAKE_ALL );
143 }
144
145 function release() {
146 if ( $this->slot === null ) {
147 return Status::newGood( PoolCounter::NOT_LOCKED ); // not locked
148 }
149
150 $status = $this->getConnection();
151 if ( !$status->isOK() ) {
152 return $status;
153 }
154 $conn = $status->value;
155
156 // @codingStandardsIgnoreStart Generic.Files.LineLength
157 static $script =
158 <<<LUA
159 local kSlots,kSlotsNextRelease,kWakeup,kWaiting = unpack(KEYS)
160 local rMaxWorkers,rExpiry,rSlot,rSlotTime,rAwakeAll,rTime = unpack(ARGV)
161 -- Add the slots back to the list (if rSlot is "w" then it is not a slot).
162 -- Treat the list as expired if the "next release" time sorted-set is missing.
163 if rSlot ~= 'w' and redis.call('exists',kSlotsNextRelease) == 1 then
164 if 1*redis.call('zScore',kSlotsNextRelease,rSlot) ~= (rSlotTime + rExpiry) then
165 -- Slot lock expired and was released already
166 elseif redis.call('lLen',kSlots) >= 1*rMaxWorkers then
167 -- Slots somehow got out of sync; reset the list for sanity
168 redis.call('del',kSlots,kSlotsNextRelease)
169 elseif redis.call('lLen',kSlots) == (1*rMaxWorkers - 1) and redis.call('zCard',kWaiting) == 0 then
170 -- Slot list will be made full; clear it to save space (it re-inits as needed)
171 -- since nothing is waiting on being unblocked by a push to the list
172 redis.call('del',kSlots,kSlotsNextRelease)
173 else
174 -- Add slot back to pool and update the "next release" time
175 redis.call('rPush',kSlots,rSlot)
176 redis.call('zAdd',kSlotsNextRelease,rTime + 30,rSlot)
177 -- Always keep renewing the expiry on use
178 redis.call('expireAt',kSlots,math.ceil(rTime + rExpiry))
179 redis.call('expireAt',kSlotsNextRelease,math.ceil(rTime + rExpiry))
180 end
181 end
182 -- Update an ephemeral list to wake up other clients that can
183 -- reuse any cached work from this process. Only do this if no
184 -- slots are currently free (e.g. clients could be waiting).
185 if 1*rAwakeAll == 1 then
186 local count = redis.call('zCard',kWaiting)
187 for i = 1,count do
188 redis.call('rPush',kWakeup,'w')
189 end
190 redis.call('pexpire',kWakeup,1)
191 end
192 return 1
193 LUA;
194 // @codingStandardsIgnoreEnd
195
196 try {
197 $conn->luaEval( $script,
198 [
199 $this->getSlotListKey(),
200 $this->getSlotRTimeSetKey(),
201 $this->getWakeupListKey(),
202 $this->getWaitSetKey(),
203 $this->workers,
204 $this->lockTTL,
205 $this->slot,
206 $this->slotTime, // used for CAS-style sanity check
207 ( $this->onRelease === self::AWAKE_ALL ) ? 1 : 0,
208 microtime( true )
209 ],
210 4 # number of first argument(s) that are keys
211 );
212 } catch ( RedisException $e ) {
213 return Status::newFatal( 'pool-error-unknown', $e->getMessage() );
214 }
215
216 $this->slot = null;
217 $this->slotTime = null;
218 $this->onRelease = null;
219 unset( self::$active[$this->session] );
220
221 $this->onRelease();
222
223 return Status::newGood( PoolCounter::RELEASED );
224 }
225
226 /**
227 * @param int $doWakeup AWAKE_* constant
228 * @return Status
229 */
230 protected function waitForSlotOrNotif( $doWakeup ) {
231 if ( $this->slot !== null ) {
232 return Status::newGood( PoolCounter::LOCK_HELD ); // already acquired
233 }
234
235 $status = $this->getConnection();
236 if ( !$status->isOK() ) {
237 return $status;
238 }
239 $conn = $status->value;
240
241 $now = microtime( true );
242 try {
243 $slot = $this->initAndPopPoolSlotList( $conn, $now );
244 if ( ctype_digit( $slot ) ) {
245 // Pool slot acquired by this process
246 $slotTime = $now;
247 } elseif ( $slot === 'QUEUE_FULL' ) {
248 // Too many processes are waiting for pooled processes to finish
249 return Status::newGood( PoolCounter::QUEUE_FULL );
250 } elseif ( $slot === 'QUEUE_WAIT' ) {
251 // This process is now registered as waiting
252 $keys = ( $doWakeup == self::AWAKE_ALL )
253 // Wait for an open slot or wake-up signal (preferring the latter)
254 ? [ $this->getWakeupListKey(), $this->getSlotListKey() ]
255 // Just wait for an actual pool slot
256 : [ $this->getSlotListKey() ];
257
258 $res = $conn->blPop( $keys, $this->timeout );
259 if ( $res === [] ) {
260 $conn->zRem( $this->getWaitSetKey(), $this->session ); // no longer waiting
261 return Status::newGood( PoolCounter::TIMEOUT );
262 }
263
264 $slot = $res[1]; // pool slot or "w" for wake-up notifications
265 $slotTime = microtime( true ); // last microtime() was a few RTTs ago
266 // Unregister this process as waiting and bump slot "next release" time
267 $this->registerAcquisitionTime( $conn, $slot, $slotTime );
268 } else {
269 return Status::newFatal( 'pool-error-unknown', "Server gave slot '$slot'." );
270 }
271 } catch ( RedisException $e ) {
272 return Status::newFatal( 'pool-error-unknown', $e->getMessage() );
273 }
274
275 if ( $slot !== 'w' ) {
276 $this->slot = $slot;
277 $this->slotTime = $slotTime;
278 $this->onRelease = $doWakeup;
279 self::$active[$this->session] = $this;
280 }
281
282 $this->onAcquire();
283
284 return Status::newGood( $slot === 'w' ? PoolCounter::DONE : PoolCounter::LOCKED );
285 }
286
287 /**
288 * @param RedisConnRef $conn
289 * @param float $now UNIX timestamp
290 * @return string|bool False on failure
291 */
292 protected function initAndPopPoolSlotList( RedisConnRef $conn, $now ) {
293 static $script =
294 <<<LUA
295 local kSlots,kSlotsNextRelease,kSlotWaits = unpack(KEYS)
296 local rMaxWorkers,rMaxQueue,rTimeout,rExpiry,rSess,rTime = unpack(ARGV)
297 -- Initialize if the "next release" time sorted-set is empty. The slot key
298 -- itself is empty if all slots are busy or when nothing is initialized.
299 -- If the list is empty but the set is not, then it is the latter case.
300 -- For sanity, if the list exists but not the set, then reset everything.
301 if redis.call('exists',kSlotsNextRelease) == 0 then
302 redis.call('del',kSlots)
303 for i = 1,1*rMaxWorkers do
304 redis.call('rPush',kSlots,i)
305 redis.call('zAdd',kSlotsNextRelease,-1,i)
306 end
307 -- Otherwise do maintenance to clean up after network partitions
308 else
309 -- Find stale slot locks and add free them (avoid duplicates for sanity)
310 local staleLocks = redis.call('zRangeByScore',kSlotsNextRelease,0,rTime)
311 for k,slot in ipairs(staleLocks) do
312 redis.call('lRem',kSlots,0,slot)
313 redis.call('rPush',kSlots,slot)
314 redis.call('zAdd',kSlotsNextRelease,rTime + 30,slot)
315 end
316 -- Find stale wait slot entries and remove them
317 redis.call('zRemRangeByScore',kSlotWaits,0,rTime - 2*rTimeout)
318 end
319 local slot
320 -- Try to acquire a slot if possible now
321 if redis.call('lLen',kSlots) > 0 then
322 slot = redis.call('lPop',kSlots)
323 -- Update the slot "next release" time
324 redis.call('zAdd',kSlotsNextRelease,rTime + rExpiry,slot)
325 elseif redis.call('zCard',kSlotWaits) >= 1*rMaxQueue then
326 slot = 'QUEUE_FULL'
327 else
328 slot = 'QUEUE_WAIT'
329 -- Register this process as waiting
330 redis.call('zAdd',kSlotWaits,rTime,rSess)
331 redis.call('expireAt',kSlotWaits,math.ceil(rTime + 2*rTimeout))
332 end
333 -- Always keep renewing the expiry on use
334 redis.call('expireAt',kSlots,math.ceil(rTime + rExpiry))
335 redis.call('expireAt',kSlotsNextRelease,math.ceil(rTime + rExpiry))
336 return slot
337 LUA;
338 return $conn->luaEval( $script,
339 [
340 $this->getSlotListKey(),
341 $this->getSlotRTimeSetKey(),
342 $this->getWaitSetKey(),
343 $this->workers,
344 $this->maxqueue,
345 $this->timeout,
346 $this->lockTTL,
347 $this->session,
348 $now
349 ],
350 3 # number of first argument(s) that are keys
351 );
352 }
353
354 /**
355 * @param RedisConnRef $conn
356 * @param string $slot
357 * @param float $now
358 * @return int|bool False on failure
359 */
360 protected function registerAcquisitionTime( RedisConnRef $conn, $slot, $now ) {
361 static $script =
362 <<<LUA
363 local kSlots,kSlotsNextRelease,kSlotWaits = unpack(KEYS)
364 local rSlot,rExpiry,rSess,rTime = unpack(ARGV)
365 -- If rSlot is 'w' then the client was told to wake up but got no slot
366 if rSlot ~= 'w' then
367 -- Update the slot "next release" time
368 redis.call('zAdd',kSlotsNextRelease,rTime + rExpiry,rSlot)
369 -- Always keep renewing the expiry on use
370 redis.call('expireAt',kSlots,math.ceil(rTime + rExpiry))
371 redis.call('expireAt',kSlotsNextRelease,math.ceil(rTime + rExpiry))
372 end
373 -- Unregister this process as waiting
374 redis.call('zRem',kSlotWaits,rSess)
375 return 1
376 LUA;
377 return $conn->luaEval( $script,
378 [
379 $this->getSlotListKey(),
380 $this->getSlotRTimeSetKey(),
381 $this->getWaitSetKey(),
382 $slot,
383 $this->lockTTL,
384 $this->session,
385 $now
386 ],
387 3 # number of first argument(s) that are keys
388 );
389 }
390
391 /**
392 * @return string
393 */
394 protected function getSlotListKey() {
395 return "poolcounter:l-slots-{$this->keySha1}-{$this->workers}";
396 }
397
398 /**
399 * @return string
400 */
401 protected function getSlotRTimeSetKey() {
402 return "poolcounter:z-renewtime-{$this->keySha1}-{$this->workers}";
403 }
404
405 /**
406 * @return string
407 */
408 protected function getWaitSetKey() {
409 return "poolcounter:z-wait-{$this->keySha1}-{$this->workers}";
410 }
411
412 /**
413 * @return string
414 */
415 protected function getWakeupListKey() {
416 return "poolcounter:l-wakeup-{$this->keySha1}-{$this->workers}";
417 }
418
419 /**
420 * Try to make sure that locks get released (even with exceptions and fatals)
421 */
422 public static function releaseAll() {
423 foreach ( self::$active as $poolCounter ) {
424 try {
425 if ( $poolCounter->slot !== null ) {
426 $poolCounter->release();
427 }
428 } catch ( Exception $e ) {
429 }
430 }
431 }
432 }