From: Aaron Schulz Date: Thu, 8 Mar 2018 03:03:32 +0000 (-0800) Subject: Rewrite HashRing to use consistent hashing X-Git-Tag: 1.34.0-rc.0~5154^2 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=f2e6543c46fff5ce43839982ecd56ebcc10f4d62 Rewrite HashRing to use consistent hashing This scheme avoids disruptive re-hashing when a node is ejected; only the adjectent clockwise sector is affected, the later taking responsibility for the former. Addition of new nodes is likewise less disruptive. When used with MD5, this maps keys the same as libketama. Also: * Make HashRing::getLocations() faster by guessing the sector list index and moving clockwise/counter-clockwise. * Made HashRing Serializable for more robust storage. * Allow for different hash functions to be specified. Note: * This makes PoolCounterRedis use the new hash scheme. * Likewise for JobQueueFederated (for job de-duplication). Switching from one mode to the other will cause some of duplicated effort from old jobs only temporarily. Change-Id: Iff432a4e5dc3fb40b2dda70fb3dbe9dda0b74933 --- diff --git a/includes/libs/HashRing.php b/includes/libs/HashRing.php index 3b9c24d978..501cbcdb59 100644 --- a/includes/libs/HashRing.php +++ b/includes/libs/HashRing.php @@ -23,58 +23,83 @@ /** * Convenience class for weighted consistent hash rings * + * This deterministically maps "keys" to a set of "locations" while avoiding clumping + * + * Each location is represented by a number of nodes on a ring proportionate to the ratio + * of its weight compared to the total location weight. Note positions are deterministically + * derived from the hash of the location name. Nodes are responsible for the portion of the + * ring, counter-clockwise, up until the next node. Locations are responsible for all portions + * of the ring that the location's nodes are responsible for. + * + * A location that is temporarily "ejected" is said to be absent from the "live" ring. + * If no location ejections are active, then the base ring and live ring are identical. + * * @since 1.22 */ -class HashRing { - /** @var array (location => weight) */ - protected $sourceMap = []; - /** @var array (location => (start, end)) */ - protected $ring = []; +class HashRing implements Serializable { + /** @var string Hashing algorithm for hash() */ + protected $algo; + /** @var int[] Non-empty (location => integer weight) */ + protected $weightByLocation; + /** @var int[] Map of (location => UNIX timestamp) */ + protected $ejectExpiryByLocation; - /** @var HashRing|null */ + /** @var array[] Non-empty list of (float, node name, location name) */ + protected $baseRing; + /** @var array[] Non-empty list of (float, node name, location name) */ protected $liveRing; - /** @var array (location => UNIX timestamp) */ - protected $ejectionExpiries = []; - /** @var int UNIX timestamp */ - protected $ejectionNextExpiry = INF; - const RING_SIZE = 268435456; // 2^28 + /** @var int|null Number of nodes scanned to place an item last time */ + private $lastNodeScanSize; + + /** @var float Number of positions on the ring */ + const RING_SIZE = 4294967296.0; // 2^32 + /** @var integer Overall number of node groups per server */ + const HASHES_PER_LOCATION = 40; + /** @var integer Number of nodes in a node group */ + const SECTORS_PER_HASH = 4; + + const KEY_POS = 0; + const KEY_LOCATION = 1; + + /** @var int Consider all locations */ + const RING_ALL = 0; + /** @var int Only consider "live" locations */ + const RING_LIVE = 1; /** - * @param array $map (location => weight) + * Make a consistent hash ring given a set of locations and their weight values + * + * @param int[] $map Map of (location => weight) + * @param string $algo Hashing algorithm listed in hash_algos() [optional] + * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries + * @since 1.31 */ - public function __construct( array $map ) { - $map = array_filter( $map, function ( $w ) { - return $w > 0; - } ); - if ( !count( $map ) ) { - throw new UnexpectedValueException( "Ring is empty or all weights are zero." ); - } - $this->sourceMap = $map; - // Sort the locations based on the hash of their names - $hashes = []; - foreach ( $map as $location => $weight ) { - $hashes[$location] = sha1( $location ); - } - uksort( $map, function ( $a, $b ) use ( $hashes ) { - return strcmp( $hashes[$a], $hashes[$b] ); - } ); - // Fit the map to weight-proportionate one with a space of size RING_SIZE - $sum = array_sum( $map ); - $standardMap = []; - foreach ( $map as $location => $weight ) { - $standardMap[$location] = (int)floor( $weight / $sum * self::RING_SIZE ); + public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) { + $this->init( $map, $algo, $ejections ); + } + + /** + * @param int[] $map Map of (location => integer) + * @param string $algo Hashing algorithm + * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires + */ + protected function init( array $map, $algo, array $ejections ) { + if ( !in_array( $algo, hash_algos(), true ) ) { + throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." ); } - // Build a ring of RING_SIZE spots, with each location at a spot in location hash order - $index = 0; - foreach ( $standardMap as $location => $weight ) { - // Location covers half-closed interval [$index,$index + $weight) - $this->ring[$location] = [ $index, $index + $weight ]; - $index += $weight; + + $weightByLocation = array_filter( $map ); + if ( $weightByLocation === [] ) { + throw new UnexpectedValueException( "No locations with non-zero weight." ); + } elseif ( min( $map ) < 0 ) { + throw new InvalidArgumentException( "Location weight cannot be negative." ); } - // Make sure the last location covers what is left - end( $this->ring ); - $this->ring[key( $this->ring )][1] = self::RING_SIZE; + + $this->algo = $algo; + $this->weightByLocation = $weightByLocation; + $this->ejectExpiryByLocation = $ejections; + $this->baseRing = $this->buildLocationRing( $this->weightByLocation, $this->algo ); } /** @@ -82,11 +107,10 @@ class HashRing { * * @param string $item * @return string Location + * @throws UnexpectedValueException */ final public function getLocation( $item ) { - $locations = $this->getLocations( $item, 1 ); - - return $locations[0]; + return $this->getLocations( $item, 1 )[0]; } /** @@ -94,46 +118,56 @@ class HashRing { * * @param string $item * @param int $limit Maximum number of locations to return - * @return array List of locations + * @param int $from One of the RING_* class constants + * @return string[] List of locations + * @throws UnexpectedValueException */ - public function getLocations( $item, $limit ) { - $locations = []; - $primaryLocation = null; - $spot = hexdec( substr( sha1( $item ), 0, 7 ) ); // first 28 bits - foreach ( $this->ring as $location => $range ) { - if ( count( $locations ) >= $limit ) { - break; - } - // The $primaryLocation is the location the item spot is in. - // After that is reached, keep appending the next locations. - if ( ( $range[0] <= $spot && $spot < $range[1] ) || $primaryLocation !== null ) { - if ( $primaryLocation === null ) { - $primaryLocation = $location; - } - $locations[] = $location; - } + public function getLocations( $item, $limit, $from = self::RING_ALL ) { + if ( $from === self::RING_ALL ) { + $ring = $this->baseRing; + } elseif ( $from === self::RING_LIVE ) { + $ring = $this->getLiveRing(); + } else { + throw new InvalidArgumentException( "Invalid ring source specified." ); + } + + // Locate this item's position on the hash ring + $position = $this->getItemPosition( $item ); + + // Guess a nearby node based on the node list being ordered and the probabilistic + // expected size of nodes being equal, varying less when with higher node counts + $guessIndex = $this->guessNodeIndexForPosition( $position, $ring ); + + // Find the index of the node within which this item resides + $itemNodeIndex = $this->findNodeIndexForPosition( $position, $guessIndex, $ring ); + if ( $itemNodeIndex === null ) { + throw new RuntimeException( __METHOD__ . ": no place for '$item' ($position)" ); } - // If more locations are requested, wrap-around and keep adding them - reset( $this->ring ); + + $locations = []; + $currentIndex = $itemNodeIndex; while ( count( $locations ) < $limit ) { - $location = key( $this->ring ); - if ( $location === $primaryLocation ) { - break; // don't go in circles + $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION]; + if ( !in_array( $nodeLocation, $locations, true ) ) { + // Ignore other nodes for the same locations already added + $locations[] = $nodeLocation; + } + $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring ); + if ( $currentIndex === $itemNodeIndex ) { + break; // all nodes visited } - $locations[] = $location; - next( $this->ring ); } return $locations; } /** - * Get the map of locations to weight (ignores 0-weight items) + * Get the map of locations to weight (does not include zero weight items) * - * @return array + * @return int[] */ public function getLocationWeights() { - return $this->sourceMap; + return $this->weightByLocation; } /** @@ -142,48 +176,282 @@ class HashRing { * @param string $location * @param int $ttl Seconds * @return bool Whether some non-ejected locations are left + * @throws UnexpectedValueException */ public function ejectFromLiveRing( $location, $ttl ) { - if ( !isset( $this->sourceMap[$location] ) ) { + if ( !isset( $this->weightByLocation[$location] ) ) { throw new UnexpectedValueException( "No location '$location' in the ring." ); } - $expiry = time() + $ttl; - $this->liveRing = null; // stale - $this->ejectionExpiries[$location] = $expiry; - $this->ejectionNextExpiry = min( $expiry, $this->ejectionNextExpiry ); - return ( count( $this->ejectionExpiries ) < count( $this->sourceMap ) ); + $expiry = $this->getCurrentTime() + $ttl; + $this->ejectExpiryByLocation[$location] = $expiry; + + $this->liveRing = null; // invalidate ring cache + + return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) ); + } + + /** + * Get the location of an item on the "live" ring + * + * @param string $item + * @return string Location + * @throws UnexpectedValueException + */ + final public function getLiveLocation( $item ) { + return $this->getLocations( $item, 1, self::RING_LIVE )[0]; + } + + /** + * Get the location of an item on the "live" ring, as well as the next locations + * + * @param string $item + * @param int $limit Maximum number of locations to return + * @return string[] List of locations + * @throws UnexpectedValueException + */ + final public function getLiveLocations( $item, $limit ) { + return $this->getLocations( $item, $limit, self::RING_LIVE ); + } + + /** + * Get the map of "live" locations to weight (does not include zero weight items) + * + * @return int[] + * @throws UnexpectedValueException + */ + public function getLiveLocationWeights() { + $now = $this->getCurrentTime(); + + return array_diff_key( + $this->weightByLocation, + array_filter( + $this->ejectExpiryByLocation, + function ( $expiry ) use ( $now ) { + return ( $expiry > $now ); + } + ) + ); + } + + /** + * @param float $position + * @param array[] $ring Either the base or live ring + * @return int + */ + private function guessNodeIndexForPosition( $position, $ring ) { + $arcRatio = $position / self::RING_SIZE; // range is [0.0, 1.0) + $maxIndex = count( $ring ) - 1; + $guessIndex = intval( $maxIndex * $arcRatio ); + + $displacement = $ring[$guessIndex][self::KEY_POS] - $position; + $aveSize = self::RING_SIZE / count( $ring ); + $shift = intval( $displacement / $aveSize ); + + $guessIndex -= $shift; + if ( $guessIndex < 0 ) { + $guessIndex = max( $maxIndex + $guessIndex, 0 ); // roll-over + } elseif ( $guessIndex > $maxIndex ) { + $guessIndex = min( $guessIndex - $maxIndex, 0 ); // roll-over + } + + return $guessIndex; + } + + /** + * @param float $position + * @param int $guessIndex Node index to start scanning + * @param array[] $ring Either the base or live ring + * @return int|null + */ + private function findNodeIndexForPosition( $position, $guessIndex, $ring ) { + $mainNodeIndex = null; // first matching node index + + $this->lastNodeScanSize = 0; + + if ( $ring[$guessIndex][self::KEY_POS] >= $position ) { + // Walk the nodes counter-clockwise until reaching a node at/before $position + do { + $priorIndex = $guessIndex; + $guessIndex = $this->getPrevClockwiseNodeIndex( $guessIndex, $ring ); + $nodePosition = $ring[$guessIndex][self::KEY_POS]; + if ( $nodePosition < $position || $guessIndex > $priorIndex ) { + $mainNodeIndex = $priorIndex; // includes roll-over case + } elseif ( $nodePosition === $position ) { + $mainNodeIndex = $guessIndex; + } + ++$this->lastNodeScanSize; + } while ( $mainNodeIndex === null ); + } else { + // Walk the nodes clockwise until reaching a node at/after $position + do { + $priorIndex = $guessIndex; + $guessIndex = $this->getNextClockwiseNodeIndex( $guessIndex, $ring ); + $nodePosition = $ring[$guessIndex][self::KEY_POS]; + if ( $nodePosition >= $position || $guessIndex < $priorIndex ) { + $mainNodeIndex = $guessIndex; // includes roll-over case + } + ++$this->lastNodeScanSize; + } while ( $mainNodeIndex === null ); + } + + return $mainNodeIndex; + } + + /** + * @param int[] $weightByLocation + * @param string $algo Hashing algorithm + * @return array[] + */ + private function buildLocationRing( array $weightByLocation, $algo ) { + $locationCount = count( $weightByLocation ); + $totalWeight = array_sum( $weightByLocation ); + + $ring = []; + // Assign nodes to all locations based on location weight + $claimed = []; // (position as string => (node, index)) + foreach ( $weightByLocation as $location => $weight ) { + $ratio = $weight / $totalWeight; + // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available; + // assign a few groups of nodes to this location based on its weight. + $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount ); + for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) { + // For efficiency, get 4 points per hash call and 4X node count. + // If $algo is MD5, then this matches that of with libketama. + // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c + $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" ); + foreach ( $positions as $gi => $position ) { + $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location"; + $posKey = (string)$position; // large integer + if ( isset( $claimed[$posKey] ) ) { + // Disallow duplicates for sanity (name decides precedence) + if ( $claimed[$posKey]['node'] > $node ) { + continue; + } else { + unset( $ring[$claimed[$posKey]['index']] ); + } + } + $ring[] = [ + self::KEY_POS => $position, + self::KEY_LOCATION => $location + ]; + $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ]; + } + } + } + // Sort the locations into clockwise order based on the hash ring position + usort( $ring, function ( $a, $b ) { + if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) { + throw new UnexpectedValueException( 'Duplicate node positions.' ); + } + + return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 ); + } ); + + return $ring; + } + + /** + * @param string $item Key + * @return float Ring position; integral number in [0, self::RING_SIZE - 1] + */ + private function getItemPosition( $item ) { + // If $algo is MD5, then this matches that of with libketama. + // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c + $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 ); + if ( strlen( $octets ) != 4 ) { + throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." ); + } + + return (float)sprintf( '%u', unpack( 'V', $octets )[1] ); + } + + /** + * @param string $nodeGroupName + * @return float[] Four ring positions on [0, self::RING_SIZE - 1] + */ + private function getNodePositionQuartet( $nodeGroupName ) { + $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 ); + if ( strlen( $octets ) != 16 ) { + throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." ); + } + + $positions = []; + foreach ( unpack( 'V4', $octets ) as $signed ) { + $positions[] = (float)sprintf( '%u', $signed ); + } + + return $positions; + } + + /** + * @param int $i Valid index for a node in the ring + * @param array[] $ring Either the base or live ring + * @return int Valid index for a node in the ring + */ + private function getNextClockwiseNodeIndex( $i, $ring ) { + if ( !isset( $ring[$i] ) ) { + throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." ); + } + + $next = $i + 1; + + return ( $next < count( $ring ) ) ? $next : 0; + } + + /** + * @param int $i Valid index for a node in the ring + * @param array[] $ring Either the base or live ring + * @return int Valid index for a node in the ring + */ + private function getPrevClockwiseNodeIndex( $i, $ring ) { + if ( !isset( $ring[$i] ) ) { + throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." ); + } + + $prev = $i - 1; + + return ( $prev >= 0 ) ? $prev : count( $ring ) - 1; } /** * Get the "live" hash ring (which does not include ejected locations) * - * @return HashRing + * @return array[] * @throws UnexpectedValueException */ protected function getLiveRing() { - $now = time(); - if ( $this->liveRing === null || $this->ejectionNextExpiry <= $now ) { - $this->ejectionExpiries = array_filter( - $this->ejectionExpiries, + if ( !$this->ejectExpiryByLocation ) { + return $this->baseRing; // nothing ejected + } + + $now = $this->getCurrentTime(); + + if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) { + // Live ring needs to be regerenated... + $this->ejectExpiryByLocation = array_filter( + $this->ejectExpiryByLocation, function ( $expiry ) use ( $now ) { return ( $expiry > $now ); } ); - if ( count( $this->ejectionExpiries ) ) { - $map = array_diff_key( $this->sourceMap, $this->ejectionExpiries ); - $this->liveRing = count( $map ) ? new self( $map ) : false; - - $this->ejectionNextExpiry = min( $this->ejectionExpiries ); - } else { // common case; avoid recalculating ring - $this->liveRing = clone $this; - $this->liveRing->ejectionExpiries = []; - $this->liveRing->ejectionNextExpiry = INF; - $this->liveRing->liveRing = null; - - $this->ejectionNextExpiry = INF; + + if ( count( $this->ejectExpiryByLocation ) ) { + // Some locations are still ejected from the ring + $liveRing = []; + foreach ( $this->baseRing as $i => $nodeInfo ) { + $location = $nodeInfo[self::KEY_LOCATION]; + if ( !isset( $this->ejectExpiryByLocation[$location] ) ) { + $liveRing[] = $nodeInfo; + } + } + } else { + $liveRing = $this->baseRing; } + + $this->liveRing = $liveRing; } + if ( !$this->liveRing ) { throw new UnexpectedValueException( "The live ring is currently empty." ); } @@ -192,35 +460,33 @@ class HashRing { } /** - * Get the location of an item on the "live" ring - * - * @param string $item - * @return string Location - * @throws UnexpectedValueException + * @return int UNIX timestamp */ - public function getLiveLocation( $item ) { - return $this->getLiveRing()->getLocation( $item ); + protected function getCurrentTime() { + return time(); } /** - * Get the location of an item on the "live" ring, as well as the next locations - * - * @param string $item - * @param int $limit Maximum number of locations to return - * @return array List of locations - * @throws UnexpectedValueException + * @return int|null */ - public function getLiveLocations( $item, $limit ) { - return $this->getLiveRing()->getLocations( $item, $limit ); + public function getLastNodeScanSize() { + return $this->lastNodeScanSize; } - /** - * Get the map of "live" locations to weight (ignores 0-weight items) - * - * @return array - * @throws UnexpectedValueException - */ - public function getLiveLocationWeights() { - return $this->getLiveRing()->getLocationWeights(); + public function serialize() { + return serialize( [ + 'algorithm' => $this->algo, + 'locations' => $this->weightByLocation, + 'ejections' => $this->ejectExpiryByLocation + ] ); + } + + public function unserialize( $serialized ) { + $data = unserialize( $serialized ); + if ( is_array( $data ) ) { + $this->init( $data['locations'], $data['algorithm'], $data['ejections'] ); + } else { + throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." ); + } } } diff --git a/includes/poolcounter/PoolCounterRedis.php b/includes/poolcounter/PoolCounterRedis.php index 9515f25dd8..f5fa4c7319 100644 --- a/includes/poolcounter/PoolCounterRedis.php +++ b/includes/poolcounter/PoolCounterRedis.php @@ -85,7 +85,9 @@ class PoolCounterRedis extends PoolCounter { parent::__construct( $conf, $type, $key ); $this->serversByLabel = $conf['servers']; - $this->ring = new HashRing( array_fill_keys( array_keys( $conf['servers'] ), 100 ) ); + + $serverLabels = array_keys( $conf['servers'] ); + $this->ring = new HashRing( array_fill_keys( $serverLabels, 10 ) ); $conf['redisConfig']['serializer'] = 'none'; // for use with Lua $this->pool = RedisConnectionPool::singleton( $conf['redisConfig'] ); diff --git a/tests/phpunit/includes/libs/HashRingTest.php b/tests/phpunit/includes/libs/HashRingTest.php index ba288281f6..1e51aa8067 100644 --- a/tests/phpunit/includes/libs/HashRingTest.php +++ b/tests/phpunit/includes/libs/HashRingTest.php @@ -2,44 +2,74 @@ /** * @group HashRing + * @covers HashRing */ class HashRingTest extends PHPUnit\Framework\TestCase { use MediaWikiCoversValidator; - /** - * @covers HashRing - */ - public function testHashRing() { - $ring = new HashRing( [ 's1' => 1, 's2' => 1, 's3' => 2, 's4' => 2, 's5' => 2, 's6' => 3 ] ); + public function testHashRingSerialize() { + $map = [ 's1' => 3, 's2' => 10, 's3' => 2, 's4' => 10, 's5' => 2, 's6' => 3 ]; + $ring = new HashRing( $map, 'md5' ); + + $serialized = serialize( $ring ); + $ringRemade = unserialize( $serialized ); + + for ( $i = 0; $i < 100; $i++ ) { + $this->assertEquals( + $ring->getLocation( "hello$i" ), + $ringRemade->getLocation( "hello$i" ), + 'Items placed at proper locations' + ); + } + } + + public function testHashRingMapping() { + // SHA-1 based and weighted + $ring = new HashRing( + [ 's1' => 1, 's2' => 1, 's3' => 2, 's4' => 2, 's5' => 2, 's6' => 3, 's7' => 0 ], + 'sha1' + ); + + $this->assertEquals( + [ 's1' => 1, 's2' => 1, 's3' => 2, 's4' => 2, 's5' => 2, 's6' => 3 ], + $ring->getLocationWeights(), + 'Normalized location weights' + ); + + $this->assertEquals( null, $ring->getLastNodeScanSize() ); $locations = []; - for ( $i = 0; $i < 20; $i++ ) { + for ( $i = 0; $i < 25; $i++ ) { $locations[ "hello$i"] = $ring->getLocation( "hello$i" ); } $expectedLocations = [ - "hello0" => "s5", + "hello0" => "s4", "hello1" => "s6", - "hello2" => "s2", - "hello3" => "s5", + "hello2" => "s3", + "hello3" => "s6", "hello4" => "s6", "hello5" => "s4", - "hello6" => "s5", + "hello6" => "s3", "hello7" => "s4", - "hello8" => "s5", - "hello9" => "s5", + "hello8" => "s3", + "hello9" => "s3", "hello10" => "s3", - "hello11" => "s6", - "hello12" => "s1", - "hello13" => "s3", - "hello14" => "s3", + "hello11" => "s5", + "hello12" => "s4", + "hello13" => "s5", + "hello14" => "s2", "hello15" => "s5", - "hello16" => "s4", - "hello17" => "s6", - "hello18" => "s6", - "hello19" => "s3" + "hello16" => "s6", + "hello17" => "s5", + "hello18" => "s1", + "hello19" => "s1", + "hello20" => "s6", + "hello21" => "s5", + "hello22" => "s3", + "hello23" => "s4", + "hello24" => "s1" ]; - $this->assertEquals( $expectedLocations, $locations, 'Items placed at proper locations' ); $locations = []; @@ -48,12 +78,273 @@ class HashRingTest extends PHPUnit\Framework\TestCase { } $expectedLocations = [ - "hello0" => [ "s5", "s6" ], - "hello1" => [ "s6", "s4" ], - "hello2" => [ "s2", "s1" ], - "hello3" => [ "s5", "s6" ], - "hello4" => [ "s6", "s4" ], + "hello0" => [ "s4", "s5" ], + "hello1" => [ "s6", "s5" ], + "hello2" => [ "s3", "s1" ], + "hello3" => [ "s6", "s5" ], + "hello4" => [ "s6", "s3" ], ]; $this->assertEquals( $expectedLocations, $locations, 'Items placed at proper locations' ); } + + /** + * @dataProvider providor_getHashLocationWeights + */ + public function testHashRingRatios( $locations, $expectedHits ) { + $ring = new HashRing( $locations, 'whirlpool' ); + + $locationStats = array_fill_keys( array_keys( $locations ), 0 ); + for ( $i = 0; $i < 10000; ++$i ) { + ++$locationStats[$ring->getLocation( "key-$i" )]; + } + $this->assertEquals( $expectedHits, $locationStats ); + } + + public static function providor_getHashLocationWeights() { + return [ + [ + [ 'big' => 10, 'medium' => 5, 'small' => 1 ], + [ 'big' => 6037, 'medium' => 3314, 'small' => 649 ] + ] + ]; + } + + /** + * @dataProvider providor_getHashLocationWeights2 + */ + public function testHashRingRatios2( $locations, $expected ) { + $ring = new HashRing( $locations, 'sha1' ); + $locationStats = array_fill_keys( array_keys( $locations ), 0 ); + for ( $i = 0; $i < 1000; ++$i ) { + foreach ( $ring->getLocations( "key-$i", 3 ) as $location ) { + ++$locationStats[$location]; + } + } + $this->assertEquals( $expected, $locationStats ); + } + + public static function providor_getHashLocationWeights2() { + return [ + [ + [ 'big1' => 10, 'big2' => 10, 'big3' => 10, 'small1' => 1, 'small2' => 1 ], + [ 'big1' => 929, 'big2' => 899, 'big3' => 887, 'small1' => 143, 'small2' => 142 ] + ] + ]; + } + + public function testBigHashRingRatios() { + $locations = []; + for ( $i = 0; $i < 128; ++$i ) { + $locations["server$i"] = 100; + } + + $ring = new HashRing( $locations, 'md5' ); + + $scans = []; + for ( $i = 0; $i < 1000; ++$i ) { + $ring->getLocation( "item$i" ); + $scans[] = $ring->getLastNodeScanSize(); + } + + $this->assertEquals( 1, min( $scans ) ); + $this->assertEquals( 24, max( $scans ) ); + // Note: log2( 140 * 128) = 14.129 (e.g. divide & conquer) + $this->assertEquals( 4.4, round( array_sum( $scans ) / count( $scans ), 1 ) ); + } + + public function testHashRingEjection() { + $map = [ 's1' => 5, 's2' => 5, 's3' => 10, 's4' => 10, 's5' => 5, 's6' => 5 ]; + $ring = new HashRing( $map, 'md5' ); + + $ring->ejectFromLiveRing( 's3', 30 ); + $ring->ejectFromLiveRing( 's6', 15 ); + + $this->assertEquals( + [ 's1' => 5, 's2' => 5, 's4' => 10, 's5' => 5 ], + $ring->getLiveLocationWeights(), + 'Live location weights' + ); + + for ( $i = 0; $i < 100; ++$i ) { + $key = "key-$i"; + + $this->assertNotEquals( 's3', $ring->getLiveLocation( $key ), 'ejected' ); + $this->assertNotEquals( 's6', $ring->getLiveLocation( $key ), 'ejected' ); + + if ( !in_array( $ring->getLocation( $key ), [ 's3', 's6' ], true ) ) { + $this->assertEquals( + $ring->getLocation( $key ), + $ring->getLiveLocation( $key ), + "Live ring otherwise matches (#$i)" + ); + $this->assertEquals( + $ring->getLocations( $key, 1 ), + $ring->getLiveLocations( $key, 1 ), + "Live ring otherwise matches (#$i)" + ); + } + } + } + + public function testHashRingCollision() { + $ring1 = new HashRing( [ 0 => 1, 6497 => 1 ] ); + $ring2 = new HashRing( [ 6497 => 1, 0 => 1 ] ); + + for ( $i = 0; $i < 100; ++$i ) { + $this->assertEquals( $ring1->getLocation( $i ), $ring2->getLocation( $i ) ); + } + } + + public function testHashRingKetamaMode() { + // Same as https://github.com/RJ/ketama/blob/master/ketama.servers + $map = [ + '10.0.1.1:11211' => 600, + '10.0.1.2:11211' => 300, + '10.0.1.3:11211' => 200, + '10.0.1.4:11211' => 350, + '10.0.1.5:11211' => 1000, + '10.0.1.6:11211' => 800, + '10.0.1.7:11211' => 950, + '10.0.1.8:11211' => 100 + ]; + $ring = new HashRing( $map, 'md5' ); + $wrapper = \Wikimedia\TestingAccessWrapper::newFromObject( $ring ); + + $ketama_test = function ( $count ) use ( $wrapper ) { + $baseRing = $wrapper->baseRing; + + $lines = []; + for ( $key = 0; $key < $count; ++$key ) { + $location = $wrapper->getLocation( $key ); + + $itemPos = $wrapper->getItemPosition( $key ); + $guess = $wrapper->guessNodeIndexForPosition( $itemPos, $baseRing ); + $nodeIndex = $wrapper->findNodeIndexForPosition( $itemPos, $guess, $baseRing ); + $nodePos = $baseRing[$nodeIndex][HashRing::KEY_POS]; + + $lines[] = sprintf( "%u %u %s\n", $itemPos, $nodePos, $location ); + } + + return "\n" . implode( '', $lines ); + }; + + // Known correct values generated from C code: + // https://github.com/RJ/ketama/blob/master/libketama/ketama_test.c + $expected = <<assertEquals( $expected, $ketama_test( 100 ), 'Ketama mode (diff check)' ); + + // Hash of known correct values from C code + $this->assertEquals( + 'c69ac9eb7a8a630c0cded201cefeaace', + md5( $ketama_test( 1e5 ) ), + 'Ketama mode (large, MD5 check)' + ); + + // Slower, full upstream MD5 check, manually verified 3/21/2018 + // $this->assertEquals( '5672b131391f5aa2b280936aec1eea74', md5( $ketama_test( 1e6 ) ) ); + } }