filebackend: clean up some comments and remove unused FileBackendStoreOpHandle field
[lhc/web/wiklou.git] / includes / libs / HashRing.php
index 3b9c24d..94413c2 100644 (file)
 /**
  * Convenience class for weighted consistent hash rings
  *
+ * This deterministically maps "keys" to a set of "locations" while avoiding clumping
+ *
+ * Each location is represented by a number of nodes on a ring proportionate to the ratio
+ * of its weight compared to the total location weight. Note positions are deterministically
+ * derived from the hash of the location name. Nodes are responsible for the portion of the
+ * ring, counter-clockwise, up until the next node. Locations are responsible for all portions
+ * of the ring that the location's nodes are responsible for.
+ *
+ * A location that is temporarily "ejected" is said to be absent from the "live" ring.
+ * If no location ejections are active, then the base ring and live ring are identical.
+ *
  * @since 1.22
  */
-class HashRing {
-       /** @var array (location => weight) */
-       protected $sourceMap = [];
-       /** @var array (location => (start, end)) */
-       protected $ring = [];
+class HashRing implements Serializable {
+       /** @var string Hashing algorithm for hash() */
+       protected $algo;
+       /** @var int[] Non-empty (location => integer weight) */
+       protected $weightByLocation;
+       /** @var int[] Map of (location => UNIX timestamp) */
+       protected $ejectExpiryByLocation;
 
-       /** @var HashRing|null */
+       /** @var array[] Non-empty position-ordered list of (position, location name) */
+       protected $baseRing;
+       /** @var array[] Non-empty position-ordered list of (position, location name) */
        protected $liveRing;
-       /** @var array (location => UNIX timestamp) */
-       protected $ejectionExpiries = [];
-       /** @var int UNIX timestamp */
-       protected $ejectionNextExpiry = INF;
 
-       const RING_SIZE = 268435456; // 2^28
+       /** @var float Number of positions on the ring */
+       const RING_SIZE = 4294967296.0; // 2^32
+       /** @var integer Overall number of node groups per server */
+       const HASHES_PER_LOCATION = 40;
+       /** @var integer Number of nodes in a node group */
+       const SECTORS_PER_HASH = 4;
+
+       const KEY_POS = 0;
+       const KEY_LOCATION = 1;
+
+       /** @var int Consider all locations */
+       const RING_ALL = 0;
+       /** @var int Only consider "live" locations */
+       const RING_LIVE = 1;
 
        /**
-        * @param array $map (location => weight)
+        * Make a consistent hash ring given a set of locations and their weight values
+        *
+        * @param int[] $map Map of (location => weight)
+        * @param string $algo Hashing algorithm listed in hash_algos() [optional]
+        * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries
+        * @since 1.31
         */
-       public function __construct( array $map ) {
-               $map = array_filter( $map, function ( $w ) {
-                       return $w > 0;
-               } );
-               if ( !count( $map ) ) {
-                       throw new UnexpectedValueException( "Ring is empty or all weights are zero." );
-               }
-               $this->sourceMap = $map;
-               // Sort the locations based on the hash of their names
-               $hashes = [];
-               foreach ( $map as $location => $weight ) {
-                       $hashes[$location] = sha1( $location );
-               }
-               uksort( $map, function ( $a, $b ) use ( $hashes ) {
-                       return strcmp( $hashes[$a], $hashes[$b] );
-               } );
-               // Fit the map to weight-proportionate one with a space of size RING_SIZE
-               $sum = array_sum( $map );
-               $standardMap = [];
-               foreach ( $map as $location => $weight ) {
-                       $standardMap[$location] = (int)floor( $weight / $sum * self::RING_SIZE );
+       public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) {
+               $this->init( $map, $algo, $ejections );
+       }
+
+       /**
+        * @param int[] $map Map of (location => integer)
+        * @param string $algo Hashing algorithm
+        * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires
+        */
+       protected function init( array $map, $algo, array $ejections ) {
+               if ( !in_array( $algo, hash_algos(), true ) ) {
+                       throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." );
                }
-               // Build a ring of RING_SIZE spots, with each location at a spot in location hash order
-               $index = 0;
-               foreach ( $standardMap as $location => $weight ) {
-                       // Location covers half-closed interval [$index,$index + $weight)
-                       $this->ring[$location] = [ $index, $index + $weight ];
-                       $index += $weight;
+
+               $weightByLocation = array_filter( $map );
+               if ( $weightByLocation === [] ) {
+                       throw new UnexpectedValueException( "No locations with non-zero weight." );
+               } elseif ( min( $map ) < 0 ) {
+                       throw new InvalidArgumentException( "Location weight cannot be negative." );
                }
-               // Make sure the last location covers what is left
-               end( $this->ring );
-               $this->ring[key( $this->ring )][1] = self::RING_SIZE;
+
+               $this->algo = $algo;
+               $this->weightByLocation = $weightByLocation;
+               $this->ejectExpiryByLocation = $ejections;
+               $this->baseRing = $this->buildLocationRing( $this->weightByLocation );
        }
 
        /**
@@ -82,58 +104,111 @@ class HashRing {
         *
         * @param string $item
         * @return string Location
+        * @throws UnexpectedValueException
         */
        final public function getLocation( $item ) {
-               $locations = $this->getLocations( $item, 1 );
-
-               return $locations[0];
+               return $this->getLocations( $item, 1 )[0];
        }
 
        /**
-        * Get the location of an item on the ring, as well as the next locations
+        * Get the location of an item on the ring followed by the next ring locations
         *
         * @param string $item
         * @param int $limit Maximum number of locations to return
-        * @return array List of locations
+        * @param int $from One of the RING_* class constants
+        * @return string[] List of locations
+        * @throws InvalidArgumentException
+        * @throws UnexpectedValueException
         */
-       public function getLocations( $item, $limit ) {
+       public function getLocations( $item, $limit, $from = self::RING_ALL ) {
+               if ( $from === self::RING_ALL ) {
+                       $ring = $this->baseRing;
+               } elseif ( $from === self::RING_LIVE ) {
+                       $ring = $this->getLiveRing();
+               } else {
+                       throw new InvalidArgumentException( "Invalid ring source specified." );
+               }
+
+               // Short-circuit for the common single-location case. Note that if there was only one
+               // location and it was ejected from the live ring, getLiveRing() would have error out.
+               if ( count( $this->weightByLocation ) == 1 ) {
+                       return ( $limit > 0 ) ? [ $ring[0][self::KEY_LOCATION] ] : [];
+               }
+
+               // Locate the node index for this item's position on the hash ring
+               $itemIndex = $this->findNodeIndexForPosition( $this->getItemPosition( $item ), $ring );
+
                $locations = [];
-               $primaryLocation = null;
-               $spot = hexdec( substr( sha1( $item ), 0, 7 ) ); // first 28 bits
-               foreach ( $this->ring as $location => $range ) {
-                       if ( count( $locations ) >= $limit ) {
-                               break;
-                       }
-                       // The $primaryLocation is the location the item spot is in.
-                       // After that is reached, keep appending the next locations.
-                       if ( ( $range[0] <= $spot && $spot < $range[1] ) || $primaryLocation !== null ) {
-                               if ( $primaryLocation === null ) {
-                                       $primaryLocation = $location;
+               $currentIndex = null;
+               while ( count( $locations ) < $limit ) {
+                       if ( $currentIndex === null ) {
+                               $currentIndex = $itemIndex;
+                       } else {
+                               $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring );
+                               if ( $currentIndex === $itemIndex ) {
+                                       break; // all nodes visited
                                }
-                               $locations[] = $location;
                        }
-               }
-               // If more locations are requested, wrap-around and keep adding them
-               reset( $this->ring );
-               while ( count( $locations ) < $limit ) {
-                       $location = key( $this->ring );
-                       if ( $location === $primaryLocation ) {
-                               break; // don't go in circles
+                       $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION];
+                       if ( !in_array( $nodeLocation, $locations, true ) ) {
+                               // Ignore other nodes for the same locations already added
+                               $locations[] = $nodeLocation;
                        }
-                       $locations[] = $location;
-                       next( $this->ring );
                }
 
                return $locations;
        }
 
        /**
-        * Get the map of locations to weight (ignores 0-weight items)
+        * @param float $position
+        * @param array[] $ring Either the base or live ring
+        * @return int|null
+        */
+       private function findNodeIndexForPosition( $position, $ring ) {
+               $count = count( $ring );
+               if ( $count === 0 ) {
+                       return null;
+               }
+
+               $index = null;
+               $lowPos = 0;
+               $highPos = $count;
+               while ( true ) {
+                       $midPos = (int)( ( $lowPos + $highPos ) / 2 );
+                       if ( $midPos === $count ) {
+                               $index = 0;
+                               break;
+                       }
+
+                       $midVal = $ring[$midPos][self::KEY_POS];
+                       $midMinusOneVal = ( $midPos === 0 ) ? 0 : $ring[$midPos - 1][self::KEY_POS];
+                       if ( $position <= $midVal && $position > $midMinusOneVal ) {
+                               $index = $midPos;
+                               break;
+                       }
+
+                       if ( $midVal < $position ) {
+                               $lowPos = $midPos + 1;
+                       } else {
+                               $highPos = $midPos - 1;
+                       }
+
+                       if ( $lowPos > $highPos ) {
+                               $index = 0;
+                               break;
+                       }
+               }
+
+               return $index;
+       }
+
+       /**
+        * Get the map of locations to weight (does not include zero weight items)
         *
-        * @return array
+        * @return int[]
         */
        public function getLocationWeights() {
-               return $this->sourceMap;
+               return $this->weightByLocation;
        }
 
        /**
@@ -142,53 +217,19 @@ class HashRing {
         * @param string $location
         * @param int $ttl Seconds
         * @return bool Whether some non-ejected locations are left
+        * @throws UnexpectedValueException
         */
        public function ejectFromLiveRing( $location, $ttl ) {
-               if ( !isset( $this->sourceMap[$location] ) ) {
+               if ( !isset( $this->weightByLocation[$location] ) ) {
                        throw new UnexpectedValueException( "No location '$location' in the ring." );
                }
-               $expiry = time() + $ttl;
-               $this->liveRing = null; // stale
-               $this->ejectionExpiries[$location] = $expiry;
-               $this->ejectionNextExpiry = min( $expiry, $this->ejectionNextExpiry );
 
-               return ( count( $this->ejectionExpiries ) < count( $this->sourceMap ) );
-       }
+               $expiry = $this->getCurrentTime() + $ttl;
+               $this->ejectExpiryByLocation[$location] = $expiry;
 
-       /**
-        * Get the "live" hash ring (which does not include ejected locations)
-        *
-        * @return HashRing
-        * @throws UnexpectedValueException
-        */
-       protected function getLiveRing() {
-               $now = time();
-               if ( $this->liveRing === null || $this->ejectionNextExpiry <= $now ) {
-                       $this->ejectionExpiries = array_filter(
-                               $this->ejectionExpiries,
-                               function ( $expiry ) use ( $now ) {
-                                       return ( $expiry > $now );
-                               }
-                       );
-                       if ( count( $this->ejectionExpiries ) ) {
-                               $map = array_diff_key( $this->sourceMap, $this->ejectionExpiries );
-                               $this->liveRing = count( $map ) ? new self( $map ) : false;
-
-                               $this->ejectionNextExpiry = min( $this->ejectionExpiries );
-                       } else { // common case; avoid recalculating ring
-                               $this->liveRing = clone $this;
-                               $this->liveRing->ejectionExpiries = [];
-                               $this->liveRing->ejectionNextExpiry = INF;
-                               $this->liveRing->liveRing = null;
-
-                               $this->ejectionNextExpiry = INF;
-                       }
-               }
-               if ( !$this->liveRing ) {
-                       throw new UnexpectedValueException( "The live ring is currently empty." );
-               }
+               $this->liveRing = null; // invalidate ring cache
 
-               return $this->liveRing;
+               return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) );
        }
 
        /**
@@ -198,8 +239,8 @@ class HashRing {
         * @return string Location
         * @throws UnexpectedValueException
         */
-       public function getLiveLocation( $item ) {
-               return $this->getLiveRing()->getLocation( $item );
+       final public function getLiveLocation( $item ) {
+               return $this->getLocations( $item, 1, self::RING_LIVE )[0];
        }
 
        /**
@@ -207,20 +248,206 @@ class HashRing {
         *
         * @param string $item
         * @param int $limit Maximum number of locations to return
-        * @return array List of locations
+        * @return string[] List of locations
         * @throws UnexpectedValueException
         */
-       public function getLiveLocations( $item, $limit ) {
-               return $this->getLiveRing()->getLocations( $item, $limit );
+       final public function getLiveLocations( $item, $limit ) {
+               return $this->getLocations( $item, $limit, self::RING_LIVE );
        }
 
        /**
-        * Get the map of "live" locations to weight (ignores 0-weight items)
+        * Get the map of "live" locations to weight (does not include zero weight items)
         *
-        * @return array
+        * @return int[]
         * @throws UnexpectedValueException
         */
        public function getLiveLocationWeights() {
-               return $this->getLiveRing()->getLocationWeights();
+               $now = $this->getCurrentTime();
+
+               return array_diff_key(
+                       $this->weightByLocation,
+                       array_filter(
+                               $this->ejectExpiryByLocation,
+                               function ( $expiry ) use ( $now ) {
+                                       return ( $expiry > $now );
+                               }
+                       )
+               );
+       }
+
+       /**
+        * @param int[] $weightByLocation
+        * @return array[]
+        */
+       private function buildLocationRing( array $weightByLocation ) {
+               $locationCount = count( $weightByLocation );
+               $totalWeight = array_sum( $weightByLocation );
+
+               $ring = [];
+               // Assign nodes to all locations based on location weight
+               $claimed = []; // (position as string => (node, index))
+               foreach ( $weightByLocation as $location => $weight ) {
+                       $ratio = $weight / $totalWeight;
+                       // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available;
+                       // assign a few groups of nodes to this location based on its weight.
+                       $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount );
+                       for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) {
+                               // For efficiency, get 4 points per hash call and 4X node count.
+                               // If $algo is MD5, then this matches that of with libketama.
+                               // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
+                               $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" );
+                               foreach ( $positions as $gi => $position ) {
+                                       $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location";
+                                       $posKey = (string)$position; // large integer
+                                       if ( isset( $claimed[$posKey] ) ) {
+                                               // Disallow duplicates for sanity (name decides precedence)
+                                               if ( $claimed[$posKey]['node'] > $node ) {
+                                                       continue;
+                                               } else {
+                                                       unset( $ring[$claimed[$posKey]['index']] );
+                                               }
+                                       }
+                                       $ring[] = [
+                                               self::KEY_POS => $position,
+                                               self::KEY_LOCATION => $location
+                                       ];
+                                       $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ];
+                               }
+                       }
+               }
+               // Sort the locations into clockwise order based on the hash ring position
+               usort( $ring, function ( $a, $b ) {
+                       if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) {
+                               throw new UnexpectedValueException( 'Duplicate node positions.' );
+                       }
+
+                       return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 );
+               } );
+
+               return $ring;
+       }
+
+       /**
+        * @param string $item Key
+        * @return float Ring position; integral number in [0, self::RING_SIZE - 1]
+        */
+       private function getItemPosition( $item ) {
+               // If $algo is MD5, then this matches that of with libketama.
+               // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
+               $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 );
+               if ( strlen( $octets ) != 4 ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." );
+               }
+
+               $pos = unpack( 'V', $octets )[1];
+               if ( $pos < 0 ) {
+                       // Most-significant-bit is set, causing unpack() to return a negative integer due
+                       // to the fact that it returns a signed int. Cast it to an unsigned integer string.
+                       $pos = sprintf( '%u', $pos );
+               }
+
+               return (float)$pos;
+       }
+
+       /**
+        * @param string $nodeGroupName
+        * @return float[] Four ring positions on [0, self::RING_SIZE - 1]
+        */
+       private function getNodePositionQuartet( $nodeGroupName ) {
+               $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 );
+               if ( strlen( $octets ) != 16 ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." );
+               }
+
+               $positions = [];
+               foreach ( unpack( 'V4', $octets ) as $signed ) {
+                       $positions[] = (float)sprintf( '%u', $signed );
+               }
+
+               return $positions;
+       }
+
+       /**
+        * @param int $i Valid index for a node in the ring
+        * @param array[] $ring Either the base or live ring
+        * @return int Valid index for a node in the ring
+        */
+       private function getNextClockwiseNodeIndex( $i, $ring ) {
+               if ( !isset( $ring[$i] ) ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
+               }
+
+               $next = $i + 1;
+
+               return ( $next < count( $ring ) ) ? $next : 0;
+       }
+
+       /**
+        * Get the "live" hash ring (which does not include ejected locations)
+        *
+        * @return array[]
+        * @throws UnexpectedValueException
+        */
+       protected function getLiveRing() {
+               if ( !$this->ejectExpiryByLocation ) {
+                       return $this->baseRing; // nothing ejected
+               }
+
+               $now = $this->getCurrentTime();
+
+               if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) {
+                       // Live ring needs to be regerenated...
+                       $this->ejectExpiryByLocation = array_filter(
+                               $this->ejectExpiryByLocation,
+                               function ( $expiry ) use ( $now ) {
+                                       return ( $expiry > $now );
+                               }
+                       );
+
+                       if ( count( $this->ejectExpiryByLocation ) ) {
+                               // Some locations are still ejected from the ring
+                               $liveRing = [];
+                               foreach ( $this->baseRing as $i => $nodeInfo ) {
+                                       $location = $nodeInfo[self::KEY_LOCATION];
+                                       if ( !isset( $this->ejectExpiryByLocation[$location] ) ) {
+                                               $liveRing[] = $nodeInfo;
+                                       }
+                               }
+                       } else {
+                               $liveRing = $this->baseRing;
+                       }
+
+                       $this->liveRing = $liveRing;
+               }
+
+               if ( !$this->liveRing ) {
+                       throw new UnexpectedValueException( "The live ring is currently empty." );
+               }
+
+               return $this->liveRing;
+       }
+
+       /**
+        * @return int UNIX timestamp
+        */
+       protected function getCurrentTime() {
+               return time();
+       }
+
+       public function serialize() {
+               return serialize( [
+                       'algorithm' => $this->algo,
+                       'locations' => $this->weightByLocation,
+                       'ejections' => $this->ejectExpiryByLocation
+               ] );
+       }
+
+       public function unserialize( $serialized ) {
+               $data = unserialize( $serialized );
+               if ( is_array( $data ) ) {
+                       $this->init( $data['locations'], $data['algorithm'], $data['ejections'] );
+               } else {
+                       throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." );
+               }
        }
 }