X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2Flibs%2FHashRing.php;h=251fa88bae0f10d3a5f5d41f4e532a4617771078;hb=d503ac7c9433a36358b1db27c6365167ea869832;hp=3b9c24d978ab133119b316d1470490bd33d24e03;hpb=95be23e0fefa0e2707d4fd9d768e8c09a7c6e2a4;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/libs/HashRing.php b/includes/libs/HashRing.php index 3b9c24d978..251fa88bae 100644 --- a/includes/libs/HashRing.php +++ b/includes/libs/HashRing.php @@ -23,58 +23,80 @@ /** * Convenience class for weighted consistent hash rings * + * This deterministically maps "keys" to a set of "locations" while avoiding clumping + * + * Each location is represented by a number of nodes on a ring proportionate to the ratio + * of its weight compared to the total location weight. Note positions are deterministically + * derived from the hash of the location name. Nodes are responsible for the portion of the + * ring, counter-clockwise, up until the next node. Locations are responsible for all portions + * of the ring that the location's nodes are responsible for. + * + * A location that is temporarily "ejected" is said to be absent from the "live" ring. + * If no location ejections are active, then the base ring and live ring are identical. + * * @since 1.22 */ -class HashRing { - /** @var array (location => weight) */ - protected $sourceMap = []; - /** @var array (location => (start, end)) */ - protected $ring = []; +class HashRing implements Serializable { + /** @var string Hashing algorithm for hash() */ + protected $algo; + /** @var int[] Non-empty (location => integer weight) */ + protected $weightByLocation; + /** @var int[] Map of (location => UNIX timestamp) */ + protected $ejectExpiryByLocation; - /** @var HashRing|null */ + /** @var array[] Non-empty list of (float, node name, location name) */ + protected $baseRing; + /** @var array[] Non-empty list of (float, node name, location name) */ protected $liveRing; - /** @var array (location => UNIX timestamp) */ - protected $ejectionExpiries = []; - /** @var int UNIX timestamp */ - protected $ejectionNextExpiry = INF; - const RING_SIZE = 268435456; // 2^28 + /** @var float Number of positions on the ring */ + const RING_SIZE = 4294967296.0; // 2^32 + /** @var integer Overall number of node groups per server */ + const HASHES_PER_LOCATION = 40; + /** @var integer Number of nodes in a node group */ + const SECTORS_PER_HASH = 4; + + const KEY_POS = 0; + const KEY_LOCATION = 1; + + /** @var int Consider all locations */ + const RING_ALL = 0; + /** @var int Only consider "live" locations */ + const RING_LIVE = 1; /** - * @param array $map (location => weight) + * Make a consistent hash ring given a set of locations and their weight values + * + * @param int[] $map Map of (location => weight) + * @param string $algo Hashing algorithm listed in hash_algos() [optional] + * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries + * @since 1.31 */ - public function __construct( array $map ) { - $map = array_filter( $map, function ( $w ) { - return $w > 0; - } ); - if ( !count( $map ) ) { - throw new UnexpectedValueException( "Ring is empty or all weights are zero." ); - } - $this->sourceMap = $map; - // Sort the locations based on the hash of their names - $hashes = []; - foreach ( $map as $location => $weight ) { - $hashes[$location] = sha1( $location ); - } - uksort( $map, function ( $a, $b ) use ( $hashes ) { - return strcmp( $hashes[$a], $hashes[$b] ); - } ); - // Fit the map to weight-proportionate one with a space of size RING_SIZE - $sum = array_sum( $map ); - $standardMap = []; - foreach ( $map as $location => $weight ) { - $standardMap[$location] = (int)floor( $weight / $sum * self::RING_SIZE ); + public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) { + $this->init( $map, $algo, $ejections ); + } + + /** + * @param int[] $map Map of (location => integer) + * @param string $algo Hashing algorithm + * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires + */ + protected function init( array $map, $algo, array $ejections ) { + if ( !in_array( $algo, hash_algos(), true ) ) { + throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." ); } - // Build a ring of RING_SIZE spots, with each location at a spot in location hash order - $index = 0; - foreach ( $standardMap as $location => $weight ) { - // Location covers half-closed interval [$index,$index + $weight) - $this->ring[$location] = [ $index, $index + $weight ]; - $index += $weight; + + $weightByLocation = array_filter( $map ); + if ( $weightByLocation === [] ) { + throw new UnexpectedValueException( "No locations with non-zero weight." ); + } elseif ( min( $map ) < 0 ) { + throw new InvalidArgumentException( "Location weight cannot be negative." ); } - // Make sure the last location covers what is left - end( $this->ring ); - $this->ring[key( $this->ring )][1] = self::RING_SIZE; + + $this->algo = $algo; + $this->weightByLocation = $weightByLocation; + $this->ejectExpiryByLocation = $ejections; + $this->baseRing = $this->buildLocationRing( $this->weightByLocation, $this->algo ); } /** @@ -82,11 +104,10 @@ class HashRing { * * @param string $item * @return string Location + * @throws UnexpectedValueException */ final public function getLocation( $item ) { - $locations = $this->getLocations( $item, 1 ); - - return $locations[0]; + return $this->getLocations( $item, 1 )[0]; } /** @@ -94,46 +115,83 @@ class HashRing { * * @param string $item * @param int $limit Maximum number of locations to return - * @return array List of locations + * @param int $from One of the RING_* class constants + * @return string[] List of locations + * @throws UnexpectedValueException */ - public function getLocations( $item, $limit ) { - $locations = []; - $primaryLocation = null; - $spot = hexdec( substr( sha1( $item ), 0, 7 ) ); // first 28 bits - foreach ( $this->ring as $location => $range ) { - if ( count( $locations ) >= $limit ) { - break; - } - // The $primaryLocation is the location the item spot is in. - // After that is reached, keep appending the next locations. - if ( ( $range[0] <= $spot && $spot < $range[1] ) || $primaryLocation !== null ) { - if ( $primaryLocation === null ) { - $primaryLocation = $location; - } - $locations[] = $location; - } + public function getLocations( $item, $limit, $from = self::RING_ALL ) { + if ( $from === self::RING_ALL ) { + $ring = $this->baseRing; + } elseif ( $from === self::RING_LIVE ) { + $ring = $this->getLiveRing(); + } else { + throw new InvalidArgumentException( "Invalid ring source specified." ); } - // If more locations are requested, wrap-around and keep adding them - reset( $this->ring ); + + // Locate this item's position on the hash ring + $position = $this->getItemPosition( $item ); + $itemNodeIndex = $this->findNodeIndexForPosition( $position, $ring ); + + $locations = []; + $currentIndex = $itemNodeIndex; while ( count( $locations ) < $limit ) { - $location = key( $this->ring ); - if ( $location === $primaryLocation ) { - break; // don't go in circles + $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION]; + if ( !in_array( $nodeLocation, $locations, true ) ) { + // Ignore other nodes for the same locations already added + $locations[] = $nodeLocation; + } + $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring ); + if ( $currentIndex === $itemNodeIndex ) { + break; // all nodes visited } - $locations[] = $location; - next( $this->ring ); } return $locations; } /** - * Get the map of locations to weight (ignores 0-weight items) + * @param float $position + * @param array[] $ring Either the base or live ring + * @return int|null + */ + private function findNodeIndexForPosition( $position, $ring ) { + $count = count( $ring ); + if ( $count === 0 ) { + return null; + } + $lowPos = 0; + $highPos = $count; + while ( true ) { + $midPos = intval( ( $lowPos + $highPos ) / 2 ); + if ( $midPos === $count ) { + return 0; + } + $midVal = $ring[$midPos][self::KEY_POS]; + $midMinusOneVal = $midPos === 0 ? 0 : $ring[$midPos - 1][self::KEY_POS]; + + if ( $position <= $midVal && $position > $midMinusOneVal ) { + return $midPos; + } + + if ( $midVal < $position ) { + $lowPos = $midPos + 1; + } else { + $highPos = $midPos - 1; + } + + if ( $lowPos > $highPos ) { + return 0; + } + } + } + + /** + * Get the map of locations to weight (does not include zero weight items) * - * @return array + * @return int[] */ public function getLocationWeights() { - return $this->sourceMap; + return $this->weightByLocation; } /** @@ -142,53 +200,19 @@ class HashRing { * @param string $location * @param int $ttl Seconds * @return bool Whether some non-ejected locations are left + * @throws UnexpectedValueException */ public function ejectFromLiveRing( $location, $ttl ) { - if ( !isset( $this->sourceMap[$location] ) ) { + if ( !isset( $this->weightByLocation[$location] ) ) { throw new UnexpectedValueException( "No location '$location' in the ring." ); } - $expiry = time() + $ttl; - $this->liveRing = null; // stale - $this->ejectionExpiries[$location] = $expiry; - $this->ejectionNextExpiry = min( $expiry, $this->ejectionNextExpiry ); - return ( count( $this->ejectionExpiries ) < count( $this->sourceMap ) ); - } + $expiry = $this->getCurrentTime() + $ttl; + $this->ejectExpiryByLocation[$location] = $expiry; - /** - * Get the "live" hash ring (which does not include ejected locations) - * - * @return HashRing - * @throws UnexpectedValueException - */ - protected function getLiveRing() { - $now = time(); - if ( $this->liveRing === null || $this->ejectionNextExpiry <= $now ) { - $this->ejectionExpiries = array_filter( - $this->ejectionExpiries, - function ( $expiry ) use ( $now ) { - return ( $expiry > $now ); - } - ); - if ( count( $this->ejectionExpiries ) ) { - $map = array_diff_key( $this->sourceMap, $this->ejectionExpiries ); - $this->liveRing = count( $map ) ? new self( $map ) : false; - - $this->ejectionNextExpiry = min( $this->ejectionExpiries ); - } else { // common case; avoid recalculating ring - $this->liveRing = clone $this; - $this->liveRing->ejectionExpiries = []; - $this->liveRing->ejectionNextExpiry = INF; - $this->liveRing->liveRing = null; - - $this->ejectionNextExpiry = INF; - } - } - if ( !$this->liveRing ) { - throw new UnexpectedValueException( "The live ring is currently empty." ); - } + $this->liveRing = null; // invalidate ring cache - return $this->liveRing; + return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) ); } /** @@ -198,8 +222,8 @@ class HashRing { * @return string Location * @throws UnexpectedValueException */ - public function getLiveLocation( $item ) { - return $this->getLiveRing()->getLocation( $item ); + final public function getLiveLocation( $item ) { + return $this->getLocations( $item, 1, self::RING_LIVE )[0]; } /** @@ -207,20 +231,200 @@ class HashRing { * * @param string $item * @param int $limit Maximum number of locations to return - * @return array List of locations + * @return string[] List of locations * @throws UnexpectedValueException */ - public function getLiveLocations( $item, $limit ) { - return $this->getLiveRing()->getLocations( $item, $limit ); + final public function getLiveLocations( $item, $limit ) { + return $this->getLocations( $item, $limit, self::RING_LIVE ); } /** - * Get the map of "live" locations to weight (ignores 0-weight items) + * Get the map of "live" locations to weight (does not include zero weight items) * - * @return array + * @return int[] * @throws UnexpectedValueException */ public function getLiveLocationWeights() { - return $this->getLiveRing()->getLocationWeights(); + $now = $this->getCurrentTime(); + + return array_diff_key( + $this->weightByLocation, + array_filter( + $this->ejectExpiryByLocation, + function ( $expiry ) use ( $now ) { + return ( $expiry > $now ); + } + ) + ); + } + + /** + * @param int[] $weightByLocation + * @param string $algo Hashing algorithm + * @return array[] + */ + private function buildLocationRing( array $weightByLocation, $algo ) { + $locationCount = count( $weightByLocation ); + $totalWeight = array_sum( $weightByLocation ); + + $ring = []; + // Assign nodes to all locations based on location weight + $claimed = []; // (position as string => (node, index)) + foreach ( $weightByLocation as $location => $weight ) { + $ratio = $weight / $totalWeight; + // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available; + // assign a few groups of nodes to this location based on its weight. + $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount ); + for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) { + // For efficiency, get 4 points per hash call and 4X node count. + // If $algo is MD5, then this matches that of with libketama. + // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c + $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" ); + foreach ( $positions as $gi => $position ) { + $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location"; + $posKey = (string)$position; // large integer + if ( isset( $claimed[$posKey] ) ) { + // Disallow duplicates for sanity (name decides precedence) + if ( $claimed[$posKey]['node'] > $node ) { + continue; + } else { + unset( $ring[$claimed[$posKey]['index']] ); + } + } + $ring[] = [ + self::KEY_POS => $position, + self::KEY_LOCATION => $location + ]; + $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ]; + } + } + } + // Sort the locations into clockwise order based on the hash ring position + usort( $ring, function ( $a, $b ) { + if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) { + throw new UnexpectedValueException( 'Duplicate node positions.' ); + } + + return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 ); + } ); + + return $ring; + } + + /** + * @param string $item Key + * @return float Ring position; integral number in [0, self::RING_SIZE - 1] + */ + private function getItemPosition( $item ) { + // If $algo is MD5, then this matches that of with libketama. + // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c + $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 ); + if ( strlen( $octets ) != 4 ) { + throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." ); + } + + return (float)sprintf( '%u', unpack( 'V', $octets )[1] ); + } + + /** + * @param string $nodeGroupName + * @return float[] Four ring positions on [0, self::RING_SIZE - 1] + */ + private function getNodePositionQuartet( $nodeGroupName ) { + $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 ); + if ( strlen( $octets ) != 16 ) { + throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." ); + } + + $positions = []; + foreach ( unpack( 'V4', $octets ) as $signed ) { + $positions[] = (float)sprintf( '%u', $signed ); + } + + return $positions; + } + + /** + * @param int $i Valid index for a node in the ring + * @param array[] $ring Either the base or live ring + * @return int Valid index for a node in the ring + */ + private function getNextClockwiseNodeIndex( $i, $ring ) { + if ( !isset( $ring[$i] ) ) { + throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." ); + } + + $next = $i + 1; + + return ( $next < count( $ring ) ) ? $next : 0; + } + + /** + * Get the "live" hash ring (which does not include ejected locations) + * + * @return array[] + * @throws UnexpectedValueException + */ + protected function getLiveRing() { + if ( !$this->ejectExpiryByLocation ) { + return $this->baseRing; // nothing ejected + } + + $now = $this->getCurrentTime(); + + if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) { + // Live ring needs to be regerenated... + $this->ejectExpiryByLocation = array_filter( + $this->ejectExpiryByLocation, + function ( $expiry ) use ( $now ) { + return ( $expiry > $now ); + } + ); + + if ( count( $this->ejectExpiryByLocation ) ) { + // Some locations are still ejected from the ring + $liveRing = []; + foreach ( $this->baseRing as $i => $nodeInfo ) { + $location = $nodeInfo[self::KEY_LOCATION]; + if ( !isset( $this->ejectExpiryByLocation[$location] ) ) { + $liveRing[] = $nodeInfo; + } + } + } else { + $liveRing = $this->baseRing; + } + + $this->liveRing = $liveRing; + } + + if ( !$this->liveRing ) { + throw new UnexpectedValueException( "The live ring is currently empty." ); + } + + return $this->liveRing; + } + + /** + * @return int UNIX timestamp + */ + protected function getCurrentTime() { + return time(); + } + + public function serialize() { + return serialize( [ + 'algorithm' => $this->algo, + 'locations' => $this->weightByLocation, + 'ejections' => $this->ejectExpiryByLocation + ] ); + } + + public function unserialize( $serialized ) { + $data = unserialize( $serialized ); + if ( is_array( $data ) ) { + $this->init( $data['locations'], $data['algorithm'], $data['ejections'] ); + } else { + throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." ); + } } }