Merge "ContribsPage: Re-remove the getContribs() method"
[lhc/web/wiklou.git] / includes / libs / HashRing.php
1 <?php
2 /**
3 * Convenience class for weighted consistent hash rings.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * Convenience class for weighted consistent hash rings
25 *
26 * This deterministically maps "keys" to a set of "locations" while avoiding clumping
27 *
28 * Each location is represented by a number of nodes on a ring proportionate to the ratio
29 * of its weight compared to the total location weight. Note positions are deterministically
30 * derived from the hash of the location name. Nodes are responsible for the portion of the
31 * ring, counter-clockwise, up until the next node. Locations are responsible for all portions
32 * of the ring that the location's nodes are responsible for.
33 *
34 * A location that is temporarily "ejected" is said to be absent from the "live" ring.
35 * If no location ejections are active, then the base ring and live ring are identical.
36 *
37 * @since 1.22
38 */
39 class HashRing implements Serializable {
40 /** @var string Hashing algorithm for hash() */
41 protected $algo;
42 /** @var int[] Non-empty (location => integer weight) */
43 protected $weightByLocation;
44 /** @var int[] Map of (location => UNIX timestamp) */
45 protected $ejectExpiryByLocation;
46
47 /** @var array[] Non-empty position-ordered list of (position, location name) */
48 protected $baseRing;
49 /** @var array[] Non-empty position-ordered list of (position, location name) */
50 protected $liveRing;
51
52 /** @var float Number of positions on the ring */
53 const RING_SIZE = 4294967296.0; // 2^32
54 /** @var integer Overall number of node groups per server */
55 const HASHES_PER_LOCATION = 40;
56 /** @var integer Number of nodes in a node group */
57 const SECTORS_PER_HASH = 4;
58
59 const KEY_POS = 0;
60 const KEY_LOCATION = 1;
61
62 /** @var int Consider all locations */
63 const RING_ALL = 0;
64 /** @var int Only consider "live" locations */
65 const RING_LIVE = 1;
66
67 /**
68 * Make a consistent hash ring given a set of locations and their weight values
69 *
70 * @param int[] $map Map of (location => weight)
71 * @param string $algo Hashing algorithm listed in hash_algos() [optional]
72 * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries
73 * @since 1.31
74 */
75 public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) {
76 $this->init( $map, $algo, $ejections );
77 }
78
79 /**
80 * @param int[] $map Map of (location => integer)
81 * @param string $algo Hashing algorithm
82 * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires
83 */
84 protected function init( array $map, $algo, array $ejections ) {
85 if ( !in_array( $algo, hash_algos(), true ) ) {
86 throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." );
87 }
88
89 $weightByLocation = array_filter( $map );
90 if ( $weightByLocation === [] ) {
91 throw new UnexpectedValueException( "No locations with non-zero weight." );
92 } elseif ( min( $map ) < 0 ) {
93 throw new InvalidArgumentException( "Location weight cannot be negative." );
94 }
95
96 $this->algo = $algo;
97 $this->weightByLocation = $weightByLocation;
98 $this->ejectExpiryByLocation = $ejections;
99 $this->baseRing = $this->buildLocationRing( $this->weightByLocation );
100 }
101
102 /**
103 * Get the location of an item on the ring
104 *
105 * @param string $item
106 * @return string Location
107 * @throws UnexpectedValueException
108 */
109 final public function getLocation( $item ) {
110 return $this->getLocations( $item, 1 )[0];
111 }
112
113 /**
114 * Get the location of an item on the ring followed by the next ring locations
115 *
116 * @param string $item
117 * @param int $limit Maximum number of locations to return
118 * @param int $from One of the RING_* class constants
119 * @return string[] List of locations
120 * @throws InvalidArgumentException
121 * @throws UnexpectedValueException
122 */
123 public function getLocations( $item, $limit, $from = self::RING_ALL ) {
124 if ( $from === self::RING_ALL ) {
125 $ring = $this->baseRing;
126 } elseif ( $from === self::RING_LIVE ) {
127 $ring = $this->getLiveRing();
128 } else {
129 throw new InvalidArgumentException( "Invalid ring source specified." );
130 }
131
132 // Locate the node index for this item's position on the hash ring
133 $itemIndex = $this->findNodeIndexForPosition( $this->getItemPosition( $item ), $ring );
134
135 $locations = [];
136 $currentIndex = null;
137 while ( count( $locations ) < $limit ) {
138 if ( $currentIndex === null ) {
139 $currentIndex = $itemIndex;
140 } else {
141 $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring );
142 if ( $currentIndex === $itemIndex ) {
143 break; // all nodes visited
144 }
145 }
146 $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION];
147 if ( !in_array( $nodeLocation, $locations, true ) ) {
148 // Ignore other nodes for the same locations already added
149 $locations[] = $nodeLocation;
150 }
151 }
152
153 return $locations;
154 }
155
156 /**
157 * @param float $position
158 * @param array[] $ring Either the base or live ring
159 * @return int|null
160 */
161 private function findNodeIndexForPosition( $position, $ring ) {
162 $count = count( $ring );
163 if ( $count === 0 ) {
164 return null;
165 }
166
167 $index = null;
168 $lowPos = 0;
169 $highPos = $count;
170 while ( true ) {
171 $midPos = (int)( ( $lowPos + $highPos ) / 2 );
172 if ( $midPos === $count ) {
173 $index = 0;
174 break;
175 }
176
177 $midVal = $ring[$midPos][self::KEY_POS];
178 $midMinusOneVal = ( $midPos === 0 ) ? 0 : $ring[$midPos - 1][self::KEY_POS];
179 if ( $position <= $midVal && $position > $midMinusOneVal ) {
180 $index = $midPos;
181 break;
182 }
183
184 if ( $midVal < $position ) {
185 $lowPos = $midPos + 1;
186 } else {
187 $highPos = $midPos - 1;
188 }
189
190 if ( $lowPos > $highPos ) {
191 $index = 0;
192 break;
193 }
194 }
195
196 return $index;
197 }
198
199 /**
200 * Get the map of locations to weight (does not include zero weight items)
201 *
202 * @return int[]
203 */
204 public function getLocationWeights() {
205 return $this->weightByLocation;
206 }
207
208 /**
209 * Remove a location from the "live" hash ring
210 *
211 * @param string $location
212 * @param int $ttl Seconds
213 * @return bool Whether some non-ejected locations are left
214 * @throws UnexpectedValueException
215 */
216 public function ejectFromLiveRing( $location, $ttl ) {
217 if ( !isset( $this->weightByLocation[$location] ) ) {
218 throw new UnexpectedValueException( "No location '$location' in the ring." );
219 }
220
221 $expiry = $this->getCurrentTime() + $ttl;
222 $this->ejectExpiryByLocation[$location] = $expiry;
223
224 $this->liveRing = null; // invalidate ring cache
225
226 return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) );
227 }
228
229 /**
230 * Get the location of an item on the "live" ring
231 *
232 * @param string $item
233 * @return string Location
234 * @throws UnexpectedValueException
235 */
236 final public function getLiveLocation( $item ) {
237 return $this->getLocations( $item, 1, self::RING_LIVE )[0];
238 }
239
240 /**
241 * Get the location of an item on the "live" ring, as well as the next locations
242 *
243 * @param string $item
244 * @param int $limit Maximum number of locations to return
245 * @return string[] List of locations
246 * @throws UnexpectedValueException
247 */
248 final public function getLiveLocations( $item, $limit ) {
249 return $this->getLocations( $item, $limit, self::RING_LIVE );
250 }
251
252 /**
253 * Get the map of "live" locations to weight (does not include zero weight items)
254 *
255 * @return int[]
256 * @throws UnexpectedValueException
257 */
258 public function getLiveLocationWeights() {
259 $now = $this->getCurrentTime();
260
261 return array_diff_key(
262 $this->weightByLocation,
263 array_filter(
264 $this->ejectExpiryByLocation,
265 function ( $expiry ) use ( $now ) {
266 return ( $expiry > $now );
267 }
268 )
269 );
270 }
271
272 /**
273 * @param int[] $weightByLocation
274 * @return array[]
275 */
276 private function buildLocationRing( array $weightByLocation ) {
277 $locationCount = count( $weightByLocation );
278 $totalWeight = array_sum( $weightByLocation );
279
280 $ring = [];
281 // Assign nodes to all locations based on location weight
282 $claimed = []; // (position as string => (node, index))
283 foreach ( $weightByLocation as $location => $weight ) {
284 $ratio = $weight / $totalWeight;
285 // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available;
286 // assign a few groups of nodes to this location based on its weight.
287 $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount );
288 for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) {
289 // For efficiency, get 4 points per hash call and 4X node count.
290 // If $algo is MD5, then this matches that of with libketama.
291 // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
292 $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" );
293 foreach ( $positions as $gi => $position ) {
294 $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location";
295 $posKey = (string)$position; // large integer
296 if ( isset( $claimed[$posKey] ) ) {
297 // Disallow duplicates for sanity (name decides precedence)
298 if ( $claimed[$posKey]['node'] > $node ) {
299 continue;
300 } else {
301 unset( $ring[$claimed[$posKey]['index']] );
302 }
303 }
304 $ring[] = [
305 self::KEY_POS => $position,
306 self::KEY_LOCATION => $location
307 ];
308 $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ];
309 }
310 }
311 }
312 // Sort the locations into clockwise order based on the hash ring position
313 usort( $ring, function ( $a, $b ) {
314 if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) {
315 throw new UnexpectedValueException( 'Duplicate node positions.' );
316 }
317
318 return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 );
319 } );
320
321 return $ring;
322 }
323
324 /**
325 * @param string $item Key
326 * @return float Ring position; integral number in [0, self::RING_SIZE - 1]
327 */
328 private function getItemPosition( $item ) {
329 // If $algo is MD5, then this matches that of with libketama.
330 // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
331 $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 );
332 if ( strlen( $octets ) != 4 ) {
333 throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." );
334 }
335
336 $pos = unpack( 'V', $octets )[1];
337 if ( $pos < 0 ) {
338 // Most-significant-bit is set, causing unpack() to return a negative integer due
339 // to the fact that it returns a signed int. Cast it to an unsigned integer string.
340 $pos = sprintf( '%u', $pos );
341 }
342
343 return (float)$pos;
344 }
345
346 /**
347 * @param string $nodeGroupName
348 * @return float[] Four ring positions on [0, self::RING_SIZE - 1]
349 */
350 private function getNodePositionQuartet( $nodeGroupName ) {
351 $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 );
352 if ( strlen( $octets ) != 16 ) {
353 throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." );
354 }
355
356 $positions = [];
357 foreach ( unpack( 'V4', $octets ) as $signed ) {
358 $positions[] = (float)sprintf( '%u', $signed );
359 }
360
361 return $positions;
362 }
363
364 /**
365 * @param int $i Valid index for a node in the ring
366 * @param array[] $ring Either the base or live ring
367 * @return int Valid index for a node in the ring
368 */
369 private function getNextClockwiseNodeIndex( $i, $ring ) {
370 if ( !isset( $ring[$i] ) ) {
371 throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
372 }
373
374 $next = $i + 1;
375
376 return ( $next < count( $ring ) ) ? $next : 0;
377 }
378
379 /**
380 * Get the "live" hash ring (which does not include ejected locations)
381 *
382 * @return array[]
383 * @throws UnexpectedValueException
384 */
385 protected function getLiveRing() {
386 if ( !$this->ejectExpiryByLocation ) {
387 return $this->baseRing; // nothing ejected
388 }
389
390 $now = $this->getCurrentTime();
391
392 if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) {
393 // Live ring needs to be regerenated...
394 $this->ejectExpiryByLocation = array_filter(
395 $this->ejectExpiryByLocation,
396 function ( $expiry ) use ( $now ) {
397 return ( $expiry > $now );
398 }
399 );
400
401 if ( count( $this->ejectExpiryByLocation ) ) {
402 // Some locations are still ejected from the ring
403 $liveRing = [];
404 foreach ( $this->baseRing as $i => $nodeInfo ) {
405 $location = $nodeInfo[self::KEY_LOCATION];
406 if ( !isset( $this->ejectExpiryByLocation[$location] ) ) {
407 $liveRing[] = $nodeInfo;
408 }
409 }
410 } else {
411 $liveRing = $this->baseRing;
412 }
413
414 $this->liveRing = $liveRing;
415 }
416
417 if ( !$this->liveRing ) {
418 throw new UnexpectedValueException( "The live ring is currently empty." );
419 }
420
421 return $this->liveRing;
422 }
423
424 /**
425 * @return int UNIX timestamp
426 */
427 protected function getCurrentTime() {
428 return time();
429 }
430
431 public function serialize() {
432 return serialize( [
433 'algorithm' => $this->algo,
434 'locations' => $this->weightByLocation,
435 'ejections' => $this->ejectExpiryByLocation
436 ] );
437 }
438
439 public function unserialize( $serialized ) {
440 $data = unserialize( $serialized );
441 if ( is_array( $data ) ) {
442 $this->init( $data['locations'], $data['algorithm'], $data['ejections'] );
443 } else {
444 throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." );
445 }
446 }
447 }