Rewrite HashRing to use consistent hashing
authorAaron Schulz <aschulz@wikimedia.org>
Thu, 8 Mar 2018 03:03:32 +0000 (19:03 -0800)
committerAaron Schulz <aschulz@wikimedia.org>
Thu, 7 Jun 2018 10:36:22 +0000 (03:36 -0700)
This scheme avoids disruptive re-hashing when a node is
ejected; only the adjectent clockwise sector is affected,
the later taking responsibility for the former. Addition
of new nodes is likewise less disruptive.

When used with MD5, this maps keys the same as libketama.

Also:
* Make HashRing::getLocations() faster by guessing the
  sector list index and moving clockwise/counter-clockwise.
* Made HashRing Serializable for more robust storage.
* Allow for different hash functions to be specified.

Note:
* This makes PoolCounterRedis use the new hash scheme.
* Likewise for JobQueueFederated (for job de-duplication).
  Switching from one mode to the other will cause some of
  duplicated effort from old jobs only temporarily.

Change-Id: Iff432a4e5dc3fb40b2dda70fb3dbe9dda0b74933

includes/libs/HashRing.php
includes/poolcounter/PoolCounterRedis.php
tests/phpunit/includes/libs/HashRingTest.php

index 3b9c24d..501cbcd 100644 (file)
 /**
  * Convenience class for weighted consistent hash rings
  *
+ * This deterministically maps "keys" to a set of "locations" while avoiding clumping
+ *
+ * Each location is represented by a number of nodes on a ring proportionate to the ratio
+ * of its weight compared to the total location weight. Note positions are deterministically
+ * derived from the hash of the location name. Nodes are responsible for the portion of the
+ * ring, counter-clockwise, up until the next node. Locations are responsible for all portions
+ * of the ring that the location's nodes are responsible for.
+ *
+ * A location that is temporarily "ejected" is said to be absent from the "live" ring.
+ * If no location ejections are active, then the base ring and live ring are identical.
+ *
  * @since 1.22
  */
-class HashRing {
-       /** @var array (location => weight) */
-       protected $sourceMap = [];
-       /** @var array (location => (start, end)) */
-       protected $ring = [];
+class HashRing implements Serializable {
+       /** @var string Hashing algorithm for hash() */
+       protected $algo;
+       /** @var int[] Non-empty (location => integer weight) */
+       protected $weightByLocation;
+       /** @var int[] Map of (location => UNIX timestamp) */
+       protected $ejectExpiryByLocation;
 
-       /** @var HashRing|null */
+       /** @var array[] Non-empty list of (float, node name, location name) */
+       protected $baseRing;
+       /** @var array[] Non-empty list of (float, node name, location name) */
        protected $liveRing;
-       /** @var array (location => UNIX timestamp) */
-       protected $ejectionExpiries = [];
-       /** @var int UNIX timestamp */
-       protected $ejectionNextExpiry = INF;
 
-       const RING_SIZE = 268435456; // 2^28
+       /** @var int|null Number of nodes scanned to place an item last time */
+       private $lastNodeScanSize;
+
+       /** @var float Number of positions on the ring */
+       const RING_SIZE = 4294967296.0; // 2^32
+       /** @var integer Overall number of node groups per server */
+       const HASHES_PER_LOCATION = 40;
+       /** @var integer Number of nodes in a node group */
+       const SECTORS_PER_HASH = 4;
+
+       const KEY_POS = 0;
+       const KEY_LOCATION = 1;
+
+       /** @var int Consider all locations */
+       const RING_ALL = 0;
+       /** @var int Only consider "live" locations */
+       const RING_LIVE = 1;
 
        /**
-        * @param array $map (location => weight)
+        * Make a consistent hash ring given a set of locations and their weight values
+        *
+        * @param int[] $map Map of (location => weight)
+        * @param string $algo Hashing algorithm listed in hash_algos() [optional]
+        * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries
+        * @since 1.31
         */
-       public function __construct( array $map ) {
-               $map = array_filter( $map, function ( $w ) {
-                       return $w > 0;
-               } );
-               if ( !count( $map ) ) {
-                       throw new UnexpectedValueException( "Ring is empty or all weights are zero." );
-               }
-               $this->sourceMap = $map;
-               // Sort the locations based on the hash of their names
-               $hashes = [];
-               foreach ( $map as $location => $weight ) {
-                       $hashes[$location] = sha1( $location );
-               }
-               uksort( $map, function ( $a, $b ) use ( $hashes ) {
-                       return strcmp( $hashes[$a], $hashes[$b] );
-               } );
-               // Fit the map to weight-proportionate one with a space of size RING_SIZE
-               $sum = array_sum( $map );
-               $standardMap = [];
-               foreach ( $map as $location => $weight ) {
-                       $standardMap[$location] = (int)floor( $weight / $sum * self::RING_SIZE );
+       public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) {
+               $this->init( $map, $algo, $ejections );
+       }
+
+       /**
+        * @param int[] $map Map of (location => integer)
+        * @param string $algo Hashing algorithm
+        * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires
+        */
+       protected function init( array $map, $algo, array $ejections ) {
+               if ( !in_array( $algo, hash_algos(), true ) ) {
+                       throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." );
                }
-               // Build a ring of RING_SIZE spots, with each location at a spot in location hash order
-               $index = 0;
-               foreach ( $standardMap as $location => $weight ) {
-                       // Location covers half-closed interval [$index,$index + $weight)
-                       $this->ring[$location] = [ $index, $index + $weight ];
-                       $index += $weight;
+
+               $weightByLocation = array_filter( $map );
+               if ( $weightByLocation === [] ) {
+                       throw new UnexpectedValueException( "No locations with non-zero weight." );
+               } elseif ( min( $map ) < 0 ) {
+                       throw new InvalidArgumentException( "Location weight cannot be negative." );
                }
-               // Make sure the last location covers what is left
-               end( $this->ring );
-               $this->ring[key( $this->ring )][1] = self::RING_SIZE;
+
+               $this->algo = $algo;
+               $this->weightByLocation = $weightByLocation;
+               $this->ejectExpiryByLocation = $ejections;
+               $this->baseRing = $this->buildLocationRing( $this->weightByLocation, $this->algo );
        }
 
        /**
@@ -82,11 +107,10 @@ class HashRing {
         *
         * @param string $item
         * @return string Location
+        * @throws UnexpectedValueException
         */
        final public function getLocation( $item ) {
-               $locations = $this->getLocations( $item, 1 );
-
-               return $locations[0];
+               return $this->getLocations( $item, 1 )[0];
        }
 
        /**
@@ -94,46 +118,56 @@ class HashRing {
         *
         * @param string $item
         * @param int $limit Maximum number of locations to return
-        * @return array List of locations
+        * @param int $from One of the RING_* class constants
+        * @return string[] List of locations
+        * @throws UnexpectedValueException
         */
-       public function getLocations( $item, $limit ) {
-               $locations = [];
-               $primaryLocation = null;
-               $spot = hexdec( substr( sha1( $item ), 0, 7 ) ); // first 28 bits
-               foreach ( $this->ring as $location => $range ) {
-                       if ( count( $locations ) >= $limit ) {
-                               break;
-                       }
-                       // The $primaryLocation is the location the item spot is in.
-                       // After that is reached, keep appending the next locations.
-                       if ( ( $range[0] <= $spot && $spot < $range[1] ) || $primaryLocation !== null ) {
-                               if ( $primaryLocation === null ) {
-                                       $primaryLocation = $location;
-                               }
-                               $locations[] = $location;
-                       }
+       public function getLocations( $item, $limit, $from = self::RING_ALL ) {
+               if ( $from === self::RING_ALL ) {
+                       $ring = $this->baseRing;
+               } elseif ( $from === self::RING_LIVE ) {
+                       $ring = $this->getLiveRing();
+               } else {
+                       throw new InvalidArgumentException( "Invalid ring source specified." );
+               }
+
+               // Locate this item's position on the hash ring
+               $position = $this->getItemPosition( $item );
+
+               // Guess a nearby node based on the node list being ordered and the probabilistic
+               // expected size of nodes being equal, varying less when with higher node counts
+               $guessIndex = $this->guessNodeIndexForPosition( $position, $ring );
+
+               // Find the index of the node within which this item resides
+               $itemNodeIndex = $this->findNodeIndexForPosition( $position, $guessIndex, $ring );
+               if ( $itemNodeIndex === null ) {
+                       throw new RuntimeException( __METHOD__ . ": no place for '$item' ($position)" );
                }
-               // If more locations are requested, wrap-around and keep adding them
-               reset( $this->ring );
+
+               $locations = [];
+               $currentIndex = $itemNodeIndex;
                while ( count( $locations ) < $limit ) {
-                       $location = key( $this->ring );
-                       if ( $location === $primaryLocation ) {
-                               break; // don't go in circles
+                       $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION];
+                       if ( !in_array( $nodeLocation, $locations, true ) ) {
+                               // Ignore other nodes for the same locations already added
+                               $locations[] = $nodeLocation;
+                       }
+                       $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring );
+                       if ( $currentIndex === $itemNodeIndex ) {
+                               break; // all nodes visited
                        }
-                       $locations[] = $location;
-                       next( $this->ring );
                }
 
                return $locations;
        }
 
        /**
-        * Get the map of locations to weight (ignores 0-weight items)
+        * Get the map of locations to weight (does not include zero weight items)
         *
-        * @return array
+        * @return int[]
         */
        public function getLocationWeights() {
-               return $this->sourceMap;
+               return $this->weightByLocation;
        }
 
        /**
@@ -142,48 +176,282 @@ class HashRing {
         * @param string $location
         * @param int $ttl Seconds
         * @return bool Whether some non-ejected locations are left
+        * @throws UnexpectedValueException
         */
        public function ejectFromLiveRing( $location, $ttl ) {
-               if ( !isset( $this->sourceMap[$location] ) ) {
+               if ( !isset( $this->weightByLocation[$location] ) ) {
                        throw new UnexpectedValueException( "No location '$location' in the ring." );
                }
-               $expiry = time() + $ttl;
-               $this->liveRing = null; // stale
-               $this->ejectionExpiries[$location] = $expiry;
-               $this->ejectionNextExpiry = min( $expiry, $this->ejectionNextExpiry );
 
-               return ( count( $this->ejectionExpiries ) < count( $this->sourceMap ) );
+               $expiry = $this->getCurrentTime() + $ttl;
+               $this->ejectExpiryByLocation[$location] = $expiry;
+
+               $this->liveRing = null; // invalidate ring cache
+
+               return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) );
+       }
+
+       /**
+        * Get the location of an item on the "live" ring
+        *
+        * @param string $item
+        * @return string Location
+        * @throws UnexpectedValueException
+        */
+       final public function getLiveLocation( $item ) {
+               return $this->getLocations( $item, 1, self::RING_LIVE )[0];
+       }
+
+       /**
+        * Get the location of an item on the "live" ring, as well as the next locations
+        *
+        * @param string $item
+        * @param int $limit Maximum number of locations to return
+        * @return string[] List of locations
+        * @throws UnexpectedValueException
+        */
+       final public function getLiveLocations( $item, $limit ) {
+               return $this->getLocations( $item, $limit, self::RING_LIVE );
+       }
+
+       /**
+        * Get the map of "live" locations to weight (does not include zero weight items)
+        *
+        * @return int[]
+        * @throws UnexpectedValueException
+        */
+       public function getLiveLocationWeights() {
+               $now = $this->getCurrentTime();
+
+               return array_diff_key(
+                       $this->weightByLocation,
+                       array_filter(
+                               $this->ejectExpiryByLocation,
+                               function ( $expiry ) use ( $now ) {
+                                       return ( $expiry > $now );
+                               }
+                       )
+               );
+       }
+
+       /**
+        * @param float $position
+        * @param array[] $ring Either the base or live ring
+        * @return int
+        */
+       private function guessNodeIndexForPosition( $position, $ring ) {
+               $arcRatio = $position / self::RING_SIZE; // range is [0.0, 1.0)
+               $maxIndex = count( $ring ) - 1;
+               $guessIndex = intval( $maxIndex * $arcRatio );
+
+               $displacement = $ring[$guessIndex][self::KEY_POS] - $position;
+               $aveSize = self::RING_SIZE / count( $ring );
+               $shift = intval( $displacement / $aveSize );
+
+               $guessIndex -= $shift;
+               if ( $guessIndex < 0 ) {
+                       $guessIndex = max( $maxIndex + $guessIndex, 0 ); // roll-over
+               } elseif ( $guessIndex > $maxIndex ) {
+                       $guessIndex = min( $guessIndex - $maxIndex, 0 ); // roll-over
+               }
+
+               return $guessIndex;
+       }
+
+       /**
+        * @param float $position
+        * @param int $guessIndex Node index to start scanning
+        * @param array[] $ring Either the base or live ring
+        * @return int|null
+        */
+       private function findNodeIndexForPosition( $position, $guessIndex, $ring ) {
+               $mainNodeIndex = null; // first matching node index
+
+               $this->lastNodeScanSize = 0;
+
+               if ( $ring[$guessIndex][self::KEY_POS] >= $position ) {
+                       // Walk the nodes counter-clockwise until reaching a node at/before $position
+                       do {
+                               $priorIndex = $guessIndex;
+                               $guessIndex = $this->getPrevClockwiseNodeIndex( $guessIndex, $ring );
+                               $nodePosition = $ring[$guessIndex][self::KEY_POS];
+                               if ( $nodePosition < $position || $guessIndex > $priorIndex ) {
+                                       $mainNodeIndex = $priorIndex; // includes roll-over case
+                               } elseif ( $nodePosition === $position ) {
+                                       $mainNodeIndex = $guessIndex;
+                               }
+                               ++$this->lastNodeScanSize;
+                       } while ( $mainNodeIndex === null );
+               } else {
+                       // Walk the nodes clockwise until reaching a node at/after $position
+                       do {
+                               $priorIndex = $guessIndex;
+                               $guessIndex = $this->getNextClockwiseNodeIndex( $guessIndex, $ring );
+                               $nodePosition = $ring[$guessIndex][self::KEY_POS];
+                               if ( $nodePosition >= $position || $guessIndex < $priorIndex ) {
+                                       $mainNodeIndex = $guessIndex; // includes roll-over case
+                               }
+                               ++$this->lastNodeScanSize;
+                       } while ( $mainNodeIndex === null );
+               }
+
+               return $mainNodeIndex;
+       }
+
+       /**
+        * @param int[] $weightByLocation
+        * @param string $algo Hashing algorithm
+        * @return array[]
+        */
+       private function buildLocationRing( array $weightByLocation, $algo ) {
+               $locationCount = count( $weightByLocation );
+               $totalWeight = array_sum( $weightByLocation );
+
+               $ring = [];
+               // Assign nodes to all locations based on location weight
+               $claimed = []; // (position as string => (node, index))
+               foreach ( $weightByLocation as $location => $weight ) {
+                       $ratio = $weight / $totalWeight;
+                       // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available;
+                       // assign a few groups of nodes to this location based on its weight.
+                       $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount );
+                       for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) {
+                               // For efficiency, get 4 points per hash call and 4X node count.
+                               // If $algo is MD5, then this matches that of with libketama.
+                               // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
+                               $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" );
+                               foreach ( $positions as $gi => $position ) {
+                                       $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location";
+                                       $posKey = (string)$position; // large integer
+                                       if ( isset( $claimed[$posKey] ) ) {
+                                               // Disallow duplicates for sanity (name decides precedence)
+                                               if ( $claimed[$posKey]['node'] > $node ) {
+                                                       continue;
+                                               } else {
+                                                       unset( $ring[$claimed[$posKey]['index']] );
+                                               }
+                                       }
+                                       $ring[] = [
+                                               self::KEY_POS => $position,
+                                               self::KEY_LOCATION => $location
+                                       ];
+                                       $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ];
+                               }
+                       }
+               }
+               // Sort the locations into clockwise order based on the hash ring position
+               usort( $ring, function ( $a, $b ) {
+                       if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) {
+                               throw new UnexpectedValueException( 'Duplicate node positions.' );
+                       }
+
+                       return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 );
+               } );
+
+               return $ring;
+       }
+
+       /**
+        * @param string $item Key
+        * @return float Ring position; integral number in [0, self::RING_SIZE - 1]
+        */
+       private function getItemPosition( $item ) {
+               // If $algo is MD5, then this matches that of with libketama.
+               // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
+               $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 );
+               if ( strlen( $octets ) != 4 ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." );
+               }
+
+               return (float)sprintf( '%u', unpack( 'V', $octets )[1] );
+       }
+
+       /**
+        * @param string $nodeGroupName
+        * @return float[] Four ring positions on [0, self::RING_SIZE - 1]
+        */
+       private function getNodePositionQuartet( $nodeGroupName ) {
+               $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 );
+               if ( strlen( $octets ) != 16 ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." );
+               }
+
+               $positions = [];
+               foreach ( unpack( 'V4', $octets ) as $signed ) {
+                       $positions[] = (float)sprintf( '%u', $signed );
+               }
+
+               return $positions;
+       }
+
+       /**
+        * @param int $i Valid index for a node in the ring
+        * @param array[] $ring Either the base or live ring
+        * @return int Valid index for a node in the ring
+        */
+       private function getNextClockwiseNodeIndex( $i, $ring ) {
+               if ( !isset( $ring[$i] ) ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
+               }
+
+               $next = $i + 1;
+
+               return ( $next < count( $ring ) ) ? $next : 0;
+       }
+
+       /**
+        * @param int $i Valid index for a node in the ring
+        * @param array[] $ring Either the base or live ring
+        * @return int Valid index for a node in the ring
+        */
+       private function getPrevClockwiseNodeIndex( $i, $ring ) {
+               if ( !isset( $ring[$i] ) ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
+               }
+
+               $prev = $i - 1;
+
+               return ( $prev >= 0 ) ? $prev : count( $ring ) - 1;
        }
 
        /**
         * Get the "live" hash ring (which does not include ejected locations)
         *
-        * @return HashRing
+        * @return array[]
         * @throws UnexpectedValueException
         */
        protected function getLiveRing() {
-               $now = time();
-               if ( $this->liveRing === null || $this->ejectionNextExpiry <= $now ) {
-                       $this->ejectionExpiries = array_filter(
-                               $this->ejectionExpiries,
+               if ( !$this->ejectExpiryByLocation ) {
+                       return $this->baseRing; // nothing ejected
+               }
+
+               $now = $this->getCurrentTime();
+
+               if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) {
+                       // Live ring needs to be regerenated...
+                       $this->ejectExpiryByLocation = array_filter(
+                               $this->ejectExpiryByLocation,
                                function ( $expiry ) use ( $now ) {
                                        return ( $expiry > $now );
                                }
                        );
-                       if ( count( $this->ejectionExpiries ) ) {
-                               $map = array_diff_key( $this->sourceMap, $this->ejectionExpiries );
-                               $this->liveRing = count( $map ) ? new self( $map ) : false;
-
-                               $this->ejectionNextExpiry = min( $this->ejectionExpiries );
-                       } else { // common case; avoid recalculating ring
-                               $this->liveRing = clone $this;
-                               $this->liveRing->ejectionExpiries = [];
-                               $this->liveRing->ejectionNextExpiry = INF;
-                               $this->liveRing->liveRing = null;
-
-                               $this->ejectionNextExpiry = INF;
+
+                       if ( count( $this->ejectExpiryByLocation ) ) {
+                               // Some locations are still ejected from the ring
+                               $liveRing = [];
+                               foreach ( $this->baseRing as $i => $nodeInfo ) {
+                                       $location = $nodeInfo[self::KEY_LOCATION];
+                                       if ( !isset( $this->ejectExpiryByLocation[$location] ) ) {
+                                               $liveRing[] = $nodeInfo;
+                                       }
+                               }
+                       } else {
+                               $liveRing = $this->baseRing;
                        }
+
+                       $this->liveRing = $liveRing;
                }
+
                if ( !$this->liveRing ) {
                        throw new UnexpectedValueException( "The live ring is currently empty." );
                }
@@ -192,35 +460,33 @@ class HashRing {
        }
 
        /**
-        * Get the location of an item on the "live" ring
-        *
-        * @param string $item
-        * @return string Location
-        * @throws UnexpectedValueException
+        * @return int UNIX timestamp
         */
-       public function getLiveLocation( $item ) {
-               return $this->getLiveRing()->getLocation( $item );
+       protected function getCurrentTime() {
+               return time();
        }
 
        /**
-        * Get the location of an item on the "live" ring, as well as the next locations
-        *
-        * @param string $item
-        * @param int $limit Maximum number of locations to return
-        * @return array List of locations
-        * @throws UnexpectedValueException
+        * @return int|null
         */
-       public function getLiveLocations( $item, $limit ) {
-               return $this->getLiveRing()->getLocations( $item, $limit );
+       public function getLastNodeScanSize() {
+               return $this->lastNodeScanSize;
        }
 
-       /**
-        * Get the map of "live" locations to weight (ignores 0-weight items)
-        *
-        * @return array
-        * @throws UnexpectedValueException
-        */
-       public function getLiveLocationWeights() {
-               return $this->getLiveRing()->getLocationWeights();
+       public function serialize() {
+               return serialize( [
+                       'algorithm' => $this->algo,
+                       'locations' => $this->weightByLocation,
+                       'ejections' => $this->ejectExpiryByLocation
+               ] );
+       }
+
+       public function unserialize( $serialized ) {
+               $data = unserialize( $serialized );
+               if ( is_array( $data ) ) {
+                       $this->init( $data['locations'], $data['algorithm'], $data['ejections'] );
+               } else {
+                       throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." );
+               }
        }
 }
index 9515f25..f5fa4c7 100644 (file)
@@ -85,7 +85,9 @@ class PoolCounterRedis extends PoolCounter {
                parent::__construct( $conf, $type, $key );
 
                $this->serversByLabel = $conf['servers'];
-               $this->ring = new HashRing( array_fill_keys( array_keys( $conf['servers'] ), 100 ) );
+
+               $serverLabels = array_keys( $conf['servers'] );
+               $this->ring = new HashRing( array_fill_keys( $serverLabels, 10 ) );
 
                $conf['redisConfig']['serializer'] = 'none'; // for use with Lua
                $this->pool = RedisConnectionPool::singleton( $conf['redisConfig'] );
index ba28828..1e51aa8 100644 (file)
@@ -2,44 +2,74 @@
 
 /**
  * @group HashRing
+ * @covers HashRing
  */
 class HashRingTest extends PHPUnit\Framework\TestCase {
 
        use MediaWikiCoversValidator;
 
-       /**
-        * @covers HashRing
-        */
-       public function testHashRing() {
-               $ring = new HashRing( [ 's1' => 1, 's2' => 1, 's3' => 2, 's4' => 2, 's5' => 2, 's6' => 3 ] );
+       public function testHashRingSerialize() {
+               $map = [ 's1' => 3, 's2' => 10, 's3' => 2, 's4' => 10, 's5' => 2, 's6' => 3 ];
+               $ring = new HashRing( $map, 'md5' );
+
+               $serialized = serialize( $ring );
+               $ringRemade = unserialize( $serialized );
+
+               for ( $i = 0; $i < 100; $i++ ) {
+                       $this->assertEquals(
+                               $ring->getLocation( "hello$i" ),
+                               $ringRemade->getLocation( "hello$i" ),
+                               'Items placed at proper locations'
+                       );
+               }
+       }
+
+       public function testHashRingMapping() {
+               // SHA-1 based and weighted
+               $ring = new HashRing(
+                       [ 's1' => 1, 's2' => 1, 's3' => 2, 's4' => 2, 's5' => 2, 's6' => 3, 's7' => 0 ],
+                       'sha1'
+               );
+
+               $this->assertEquals(
+                       [ 's1' => 1, 's2' => 1, 's3' => 2, 's4' => 2, 's5' => 2, 's6' => 3 ],
+                       $ring->getLocationWeights(),
+                       'Normalized location weights'
+               );
+
+               $this->assertEquals( null, $ring->getLastNodeScanSize() );
 
                $locations = [];
-               for ( $i = 0; $i < 20; $i++ ) {
+               for ( $i = 0; $i < 25; $i++ ) {
                        $locations[ "hello$i"] = $ring->getLocation( "hello$i" );
                }
                $expectedLocations = [
-                       "hello0" => "s5",
+                       "hello0" => "s4",
                        "hello1" => "s6",
-                       "hello2" => "s2",
-                       "hello3" => "s5",
+                       "hello2" => "s3",
+                       "hello3" => "s6",
                        "hello4" => "s6",
                        "hello5" => "s4",
-                       "hello6" => "s5",
+                       "hello6" => "s3",
                        "hello7" => "s4",
-                       "hello8" => "s5",
-                       "hello9" => "s5",
+                       "hello8" => "s3",
+                       "hello9" => "s3",
                        "hello10" => "s3",
-                       "hello11" => "s6",
-                       "hello12" => "s1",
-                       "hello13" => "s3",
-                       "hello14" => "s3",
+                       "hello11" => "s5",
+                       "hello12" => "s4",
+                       "hello13" => "s5",
+                       "hello14" => "s2",
                        "hello15" => "s5",
-                       "hello16" => "s4",
-                       "hello17" => "s6",
-                       "hello18" => "s6",
-                       "hello19" => "s3"
+                       "hello16" => "s6",
+                       "hello17" => "s5",
+                       "hello18" => "s1",
+                       "hello19" => "s1",
+                       "hello20" => "s6",
+                       "hello21" => "s5",
+                       "hello22" => "s3",
+                       "hello23" => "s4",
+                       "hello24" => "s1"
                ];
-
                $this->assertEquals( $expectedLocations, $locations, 'Items placed at proper locations' );
 
                $locations = [];
@@ -48,12 +78,273 @@ class HashRingTest extends PHPUnit\Framework\TestCase {
                }
 
                $expectedLocations = [
-                       "hello0" => [ "s5", "s6" ],
-                       "hello1" => [ "s6", "s4" ],
-                       "hello2" => [ "s2", "s1" ],
-                       "hello3" => [ "s5", "s6" ],
-                       "hello4" => [ "s6", "s4" ],
+                       "hello0" => [ "s4", "s5" ],
+                       "hello1" => [ "s6", "s5" ],
+                       "hello2" => [ "s3", "s1" ],
+                       "hello3" => [ "s6", "s5" ],
+                       "hello4" => [ "s6", "s3" ],
                ];
                $this->assertEquals( $expectedLocations, $locations, 'Items placed at proper locations' );
        }
+
+       /**
+        * @dataProvider providor_getHashLocationWeights
+        */
+       public function testHashRingRatios( $locations, $expectedHits ) {
+               $ring = new HashRing( $locations, 'whirlpool' );
+
+               $locationStats = array_fill_keys( array_keys( $locations ), 0 );
+               for ( $i = 0; $i < 10000; ++$i ) {
+                       ++$locationStats[$ring->getLocation( "key-$i" )];
+               }
+               $this->assertEquals( $expectedHits, $locationStats );
+       }
+
+       public static function providor_getHashLocationWeights() {
+               return [
+                       [
+                               [ 'big' => 10, 'medium' => 5, 'small' => 1 ],
+                               [ 'big' => 6037, 'medium' => 3314, 'small' => 649 ]
+                       ]
+               ];
+       }
+
+       /**
+        * @dataProvider providor_getHashLocationWeights2
+        */
+       public function testHashRingRatios2( $locations, $expected ) {
+               $ring = new HashRing( $locations, 'sha1' );
+               $locationStats = array_fill_keys( array_keys( $locations ), 0 );
+               for ( $i = 0; $i < 1000; ++$i ) {
+                       foreach ( $ring->getLocations( "key-$i", 3 ) as $location ) {
+                               ++$locationStats[$location];
+                       }
+               }
+               $this->assertEquals( $expected, $locationStats );
+       }
+
+       public static function providor_getHashLocationWeights2() {
+               return [
+                       [
+                               [ 'big1' => 10, 'big2' => 10, 'big3' => 10, 'small1' => 1, 'small2' => 1 ],
+                               [ 'big1' => 929, 'big2' => 899, 'big3' => 887, 'small1' => 143, 'small2' => 142 ]
+                       ]
+               ];
+       }
+
+       public function testBigHashRingRatios() {
+               $locations = [];
+               for ( $i = 0; $i < 128; ++$i ) {
+                       $locations["server$i"] = 100;
+               }
+
+               $ring = new HashRing( $locations, 'md5' );
+
+               $scans = [];
+               for ( $i = 0; $i < 1000; ++$i ) {
+                       $ring->getLocation( "item$i" );
+                       $scans[] = $ring->getLastNodeScanSize();
+               }
+
+               $this->assertEquals( 1, min( $scans ) );
+               $this->assertEquals( 24, max( $scans ) );
+               // Note: log2( 140 * 128) = 14.129 (e.g. divide & conquer)
+               $this->assertEquals( 4.4, round( array_sum( $scans ) / count( $scans ), 1 ) );
+       }
+
+       public function testHashRingEjection() {
+               $map = [ 's1' => 5, 's2' => 5, 's3' => 10, 's4' => 10, 's5' => 5, 's6' => 5 ];
+               $ring = new HashRing( $map, 'md5' );
+
+               $ring->ejectFromLiveRing( 's3', 30 );
+               $ring->ejectFromLiveRing( 's6', 15 );
+
+               $this->assertEquals(
+                       [ 's1' => 5, 's2' => 5, 's4' => 10, 's5' => 5 ],
+                       $ring->getLiveLocationWeights(),
+                       'Live location weights'
+               );
+
+               for ( $i = 0; $i < 100; ++$i ) {
+                       $key = "key-$i";
+
+                       $this->assertNotEquals( 's3', $ring->getLiveLocation( $key ), 'ejected' );
+                       $this->assertNotEquals( 's6', $ring->getLiveLocation( $key ), 'ejected' );
+
+                       if ( !in_array( $ring->getLocation( $key ), [ 's3', 's6' ], true ) ) {
+                               $this->assertEquals(
+                                       $ring->getLocation( $key ),
+                                       $ring->getLiveLocation( $key ),
+                                       "Live ring otherwise matches (#$i)"
+                               );
+                               $this->assertEquals(
+                                       $ring->getLocations( $key, 1 ),
+                                       $ring->getLiveLocations( $key, 1 ),
+                                       "Live ring otherwise matches (#$i)"
+                               );
+                       }
+               }
+       }
+
+       public function testHashRingCollision() {
+               $ring1 = new HashRing( [ 0 => 1, 6497 => 1 ] );
+               $ring2 = new HashRing( [ 6497 => 1, 0 => 1 ] );
+
+               for ( $i = 0; $i < 100; ++$i ) {
+                       $this->assertEquals( $ring1->getLocation( $i ), $ring2->getLocation( $i ) );
+               }
+       }
+
+       public function testHashRingKetamaMode() {
+               // Same as https://github.com/RJ/ketama/blob/master/ketama.servers
+               $map = [
+                       '10.0.1.1:11211' => 600,
+                       '10.0.1.2:11211' => 300,
+                       '10.0.1.3:11211' => 200,
+                       '10.0.1.4:11211' => 350,
+                       '10.0.1.5:11211' => 1000,
+                       '10.0.1.6:11211' => 800,
+                       '10.0.1.7:11211' => 950,
+                       '10.0.1.8:11211' => 100
+               ];
+               $ring = new HashRing( $map, 'md5' );
+               $wrapper = \Wikimedia\TestingAccessWrapper::newFromObject( $ring );
+
+               $ketama_test = function ( $count ) use ( $wrapper ) {
+                       $baseRing = $wrapper->baseRing;
+
+                       $lines = [];
+                       for ( $key = 0; $key < $count; ++$key ) {
+                               $location = $wrapper->getLocation( $key );
+
+                               $itemPos = $wrapper->getItemPosition( $key );
+                               $guess = $wrapper->guessNodeIndexForPosition( $itemPos, $baseRing );
+                               $nodeIndex = $wrapper->findNodeIndexForPosition( $itemPos, $guess, $baseRing );
+                               $nodePos = $baseRing[$nodeIndex][HashRing::KEY_POS];
+
+                               $lines[] = sprintf( "%u %u %s\n", $itemPos, $nodePos, $location );
+                       }
+
+                       return "\n" . implode( '', $lines );
+               };
+
+               // Known correct values generated from C code:
+               // https://github.com/RJ/ketama/blob/master/libketama/ketama_test.c
+               $expected = <<<EOT
+
+2216742351 2217271743 10.0.1.1:11211
+943901380 949045552 10.0.1.5:11211
+2373066440 2374693370 10.0.1.6:11211
+2127088620 2130338203 10.0.1.6:11211
+2046197672 2051996197 10.0.1.7:11211
+2134629092 2135172435 10.0.1.1:11211
+470382870 472541453 10.0.1.7:11211
+1608782991 1609789509 10.0.1.3:11211
+2516119753 2520092206 10.0.1.2:11211
+3465331781 3466294492 10.0.1.4:11211
+1749342675 1753760600 10.0.1.5:11211
+1136464485 1137779711 10.0.1.1:11211
+3620997826 3621580689 10.0.1.7:11211
+283385029 285581365 10.0.1.6:11211
+2300818346 2302165654 10.0.1.5:11211
+2132603803 2134614475 10.0.1.8:11211
+2962705863 2969767984 10.0.1.2:11211
+786427760 786565633 10.0.1.5:11211
+4095887727 4096760944 10.0.1.6:11211
+2906459679 2906987515 10.0.1.6:11211
+137884056 138922607 10.0.1.4:11211
+81549628 82491298 10.0.1.6:11211
+3530020790 3530525869 10.0.1.6:11211
+4231817527 4234960467 10.0.1.7:11211
+2011099423 2014738083 10.0.1.7:11211
+107620750 120968799 10.0.1.6:11211
+3979113294 3981926993 10.0.1.4:11211
+273671938 276355738 10.0.1.4:11211
+4032816947 4033300359 10.0.1.5:11211
+464234862 466093615 10.0.1.1:11211
+3007059764 3007671127 10.0.1.5:11211
+542337729 542491760 10.0.1.7:11211
+4040385635 4044064727 10.0.1.5:11211
+3319802648 3320661601 10.0.1.7:11211
+1032153571 1035085391 10.0.1.1:11211
+3543939100 3545608820 10.0.1.5:11211
+3876899353 3885324049 10.0.1.2:11211
+3771318181 3773259708 10.0.1.8:11211
+3457906597 3459285639 10.0.1.5:11211
+3028975062 3031083168 10.0.1.7:11211
+244467158 250943416 10.0.1.5:11211
+1604785716 1609789509 10.0.1.3:11211
+3905343649 3905751132 10.0.1.1:11211
+1713497623 1725056963 10.0.1.5:11211
+1668356087 1668827816 10.0.1.5:11211
+3427369836 3438933308 10.0.1.1:11211
+2515850457 2520092206 10.0.1.2:11211
+3886138983 3887390208 10.0.1.1:11211
+4019334756 4023153300 10.0.1.8:11211
+1170561012 1170785765 10.0.1.7:11211
+1841809344 1848425105 10.0.1.6:11211
+973223976 973369204 10.0.1.1:11211
+358093210 359562433 10.0.1.6:11211
+378350808 380841931 10.0.1.5:11211
+4008477862 4012085095 10.0.1.7:11211
+1027226549 1028630030 10.0.1.6:11211
+2386583967 2387706118 10.0.1.1:11211
+522892146 524831677 10.0.1.7:11211
+3779194982 3788912803 10.0.1.5:11211
+3764731657 3771312500 10.0.1.7:11211
+184756999 187529415 10.0.1.6:11211
+838351231 845886003 10.0.1.3:11211
+2827220548 2828019973 10.0.1.6:11211
+3604721411 3607668249 10.0.1.6:11211
+472866282 475506254 10.0.1.5:11211
+2752268796 2754833471 10.0.1.5:11211
+1791464754 1795042583 10.0.1.7:11211
+3029359475 3031083168 10.0.1.7:11211
+3633378211 3639985542 10.0.1.6:11211
+3148267284 3149217023 10.0.1.6:11211
+163887996 166705043 10.0.1.7:11211
+3642803426 3649125922 10.0.1.7:11211
+3901799218 3902199881 10.0.1.7:11211
+418045394 425867331 10.0.1.6:11211
+346775981 348578169 10.0.1.6:11211
+368352208 372224616 10.0.1.7:11211
+2643711995 2644259911 10.0.1.5:11211
+2032983336 2033860601 10.0.1.6:11211
+3567842357 3572867530 10.0.1.2:11211
+1024982737 1028630030 10.0.1.6:11211
+933966832 938106828 10.0.1.7:11211
+2102520899 2103402846 10.0.1.7:11211
+3537205399 3538094881 10.0.1.7:11211
+2311233534 2314593262 10.0.1.1:11211
+2500514664 2503565236 10.0.1.7:11211
+1091958846 1093484995 10.0.1.6:11211
+3984972691 3987453644 10.0.1.1:11211
+2669994439 2670911201 10.0.1.4:11211
+2846111786 2846115813 10.0.1.5:11211
+1805010806 1808593732 10.0.1.8:11211
+1587024774 1587746378 10.0.1.5:11211
+3214549588 3215619351 10.0.1.2:11211
+1965214866 1970922428 10.0.1.7:11211
+1038671000 1040777775 10.0.1.7:11211
+820820468 823114475 10.0.1.6:11211
+2722835329 2723166435 10.0.1.5:11211
+1602053414 1604196066 10.0.1.5:11211
+1330835426 1335097278 10.0.1.5:11211
+556547565 557075710 10.0.1.4:11211
+2977587884 2978402952 10.0.1.1:11211
+
+EOT;
+
+               $this->assertEquals( $expected, $ketama_test( 100 ), 'Ketama mode (diff check)' );
+
+               // Hash of known correct values from C code
+               $this->assertEquals(
+                       'c69ac9eb7a8a630c0cded201cefeaace',
+                       md5( $ketama_test( 1e5 ) ),
+                       'Ketama mode (large, MD5 check)'
+               );
+
+               // Slower, full upstream MD5 check, manually verified 3/21/2018
+               // $this->assertEquals( '5672b131391f5aa2b280936aec1eea74', md5( $ketama_test( 1e6 ) ) );
+       }
 }