From 4aecdad6477e7ddf2eee1a35a17ab9d9141c3ff9 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Thu, 7 Jun 2018 15:26:47 +1000 Subject: [PATCH] HashRing: use bisection instead of guess and refine Use bisection to find the position within the ring, as in ketama. According to my benchmarking this is faster than the guess/refine algorithm for rings with 100 locations, i.e. 16000 ring nodes requiring at most 14 bisection iterations. Change-Id: I6042f382de093081971b5ec210eda8dfa74988f2 --- includes/libs/HashRing.php | 136 +++++-------------- tests/phpunit/includes/libs/HashRingTest.php | 25 +--- 2 files changed, 38 insertions(+), 123 deletions(-) diff --git a/includes/libs/HashRing.php b/includes/libs/HashRing.php index 501cbcdb59..251fa88bae 100644 --- a/includes/libs/HashRing.php +++ b/includes/libs/HashRing.php @@ -49,9 +49,6 @@ class HashRing implements Serializable { /** @var array[] Non-empty list of (float, node name, location name) */ protected $liveRing; - /** @var int|null Number of nodes scanned to place an item last time */ - private $lastNodeScanSize; - /** @var float Number of positions on the ring */ const RING_SIZE = 4294967296.0; // 2^32 /** @var integer Overall number of node groups per server */ @@ -133,16 +130,7 @@ class HashRing implements Serializable { // Locate this item's position on the hash ring $position = $this->getItemPosition( $item ); - - // Guess a nearby node based on the node list being ordered and the probabilistic - // expected size of nodes being equal, varying less when with higher node counts - $guessIndex = $this->guessNodeIndexForPosition( $position, $ring ); - - // Find the index of the node within which this item resides - $itemNodeIndex = $this->findNodeIndexForPosition( $position, $guessIndex, $ring ); - if ( $itemNodeIndex === null ) { - throw new RuntimeException( __METHOD__ . ": no place for '$item' ($position)" ); - } + $itemNodeIndex = $this->findNodeIndexForPosition( $position, $ring ); $locations = []; $currentIndex = $itemNodeIndex; @@ -161,6 +149,42 @@ class HashRing implements Serializable { return $locations; } + /** + * @param float $position + * @param array[] $ring Either the base or live ring + * @return int|null + */ + private function findNodeIndexForPosition( $position, $ring ) { + $count = count( $ring ); + if ( $count === 0 ) { + return null; + } + $lowPos = 0; + $highPos = $count; + while ( true ) { + $midPos = intval( ( $lowPos + $highPos ) / 2 ); + if ( $midPos === $count ) { + return 0; + } + $midVal = $ring[$midPos][self::KEY_POS]; + $midMinusOneVal = $midPos === 0 ? 0 : $ring[$midPos - 1][self::KEY_POS]; + + if ( $position <= $midVal && $position > $midMinusOneVal ) { + return $midPos; + } + + if ( $midVal < $position ) { + $lowPos = $midPos + 1; + } else { + $highPos = $midPos - 1; + } + + if ( $lowPos > $highPos ) { + return 0; + } + } + } + /** * Get the map of locations to weight (does not include zero weight items) * @@ -234,70 +258,6 @@ class HashRing implements Serializable { ); } - /** - * @param float $position - * @param array[] $ring Either the base or live ring - * @return int - */ - private function guessNodeIndexForPosition( $position, $ring ) { - $arcRatio = $position / self::RING_SIZE; // range is [0.0, 1.0) - $maxIndex = count( $ring ) - 1; - $guessIndex = intval( $maxIndex * $arcRatio ); - - $displacement = $ring[$guessIndex][self::KEY_POS] - $position; - $aveSize = self::RING_SIZE / count( $ring ); - $shift = intval( $displacement / $aveSize ); - - $guessIndex -= $shift; - if ( $guessIndex < 0 ) { - $guessIndex = max( $maxIndex + $guessIndex, 0 ); // roll-over - } elseif ( $guessIndex > $maxIndex ) { - $guessIndex = min( $guessIndex - $maxIndex, 0 ); // roll-over - } - - return $guessIndex; - } - - /** - * @param float $position - * @param int $guessIndex Node index to start scanning - * @param array[] $ring Either the base or live ring - * @return int|null - */ - private function findNodeIndexForPosition( $position, $guessIndex, $ring ) { - $mainNodeIndex = null; // first matching node index - - $this->lastNodeScanSize = 0; - - if ( $ring[$guessIndex][self::KEY_POS] >= $position ) { - // Walk the nodes counter-clockwise until reaching a node at/before $position - do { - $priorIndex = $guessIndex; - $guessIndex = $this->getPrevClockwiseNodeIndex( $guessIndex, $ring ); - $nodePosition = $ring[$guessIndex][self::KEY_POS]; - if ( $nodePosition < $position || $guessIndex > $priorIndex ) { - $mainNodeIndex = $priorIndex; // includes roll-over case - } elseif ( $nodePosition === $position ) { - $mainNodeIndex = $guessIndex; - } - ++$this->lastNodeScanSize; - } while ( $mainNodeIndex === null ); - } else { - // Walk the nodes clockwise until reaching a node at/after $position - do { - $priorIndex = $guessIndex; - $guessIndex = $this->getNextClockwiseNodeIndex( $guessIndex, $ring ); - $nodePosition = $ring[$guessIndex][self::KEY_POS]; - if ( $nodePosition >= $position || $guessIndex < $priorIndex ) { - $mainNodeIndex = $guessIndex; // includes roll-over case - } - ++$this->lastNodeScanSize; - } while ( $mainNodeIndex === null ); - } - - return $mainNodeIndex; - } - /** * @param int[] $weightByLocation * @param string $algo Hashing algorithm @@ -399,21 +359,6 @@ class HashRing implements Serializable { return ( $next < count( $ring ) ) ? $next : 0; } - /** - * @param int $i Valid index for a node in the ring - * @param array[] $ring Either the base or live ring - * @return int Valid index for a node in the ring - */ - private function getPrevClockwiseNodeIndex( $i, $ring ) { - if ( !isset( $ring[$i] ) ) { - throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." ); - } - - $prev = $i - 1; - - return ( $prev >= 0 ) ? $prev : count( $ring ) - 1; - } - /** * Get the "live" hash ring (which does not include ejected locations) * @@ -466,13 +411,6 @@ class HashRing implements Serializable { return time(); } - /** - * @return int|null - */ - public function getLastNodeScanSize() { - return $this->lastNodeScanSize; - } - public function serialize() { return serialize( [ 'algorithm' => $this->algo, diff --git a/tests/phpunit/includes/libs/HashRingTest.php b/tests/phpunit/includes/libs/HashRingTest.php index 1e51aa8067..acaeb02558 100644 --- a/tests/phpunit/includes/libs/HashRingTest.php +++ b/tests/phpunit/includes/libs/HashRingTest.php @@ -37,8 +37,6 @@ class HashRingTest extends PHPUnit\Framework\TestCase { 'Normalized location weights' ); - $this->assertEquals( null, $ring->getLastNodeScanSize() ); - $locations = []; for ( $i = 0; $i < 25; $i++ ) { $locations[ "hello$i"] = $ring->getLocation( "hello$i" ); @@ -132,26 +130,6 @@ class HashRingTest extends PHPUnit\Framework\TestCase { ]; } - public function testBigHashRingRatios() { - $locations = []; - for ( $i = 0; $i < 128; ++$i ) { - $locations["server$i"] = 100; - } - - $ring = new HashRing( $locations, 'md5' ); - - $scans = []; - for ( $i = 0; $i < 1000; ++$i ) { - $ring->getLocation( "item$i" ); - $scans[] = $ring->getLastNodeScanSize(); - } - - $this->assertEquals( 1, min( $scans ) ); - $this->assertEquals( 24, max( $scans ) ); - // Note: log2( 140 * 128) = 14.129 (e.g. divide & conquer) - $this->assertEquals( 4.4, round( array_sum( $scans ) / count( $scans ), 1 ) ); - } - public function testHashRingEjection() { $map = [ 's1' => 5, 's2' => 5, 's3' => 10, 's4' => 10, 's5' => 5, 's6' => 5 ]; $ring = new HashRing( $map, 'md5' ); @@ -218,8 +196,7 @@ class HashRingTest extends PHPUnit\Framework\TestCase { $location = $wrapper->getLocation( $key ); $itemPos = $wrapper->getItemPosition( $key ); - $guess = $wrapper->guessNodeIndexForPosition( $itemPos, $baseRing ); - $nodeIndex = $wrapper->findNodeIndexForPosition( $itemPos, $guess, $baseRing ); + $nodeIndex = $wrapper->findNodeIndexForPosition( $itemPos, $baseRing ); $nodePos = $baseRing[$nodeIndex][HashRing::KEY_POS]; $lines[] = sprintf( "%u %u %s\n", $itemPos, $nodePos, $location ); -- 2.20.1