Use consistent hashing for SqlBagOStuff servers
authorTim Starling <tstarling@wikimedia.org>
Fri, 4 Jan 2013 09:23:26 +0000 (20:23 +1100)
committerGerrit Code Review <gerrit@wikimedia.org>
Fri, 4 Jan 2013 18:07:07 +0000 (18:07 +0000)
Also factor out consistent hash code from Redis and the new application
into a class called ArrayUtils. The name "ArrayUtils" is from
I0f4e7d7c, I expect that change will be rebased on top of this one at
some point.

Change-Id: I9375087f4d7a6e8e629d97bfb6b117d9cb7d1bcf

includes/ArrayUtils.php [new file with mode: 0644]
includes/AutoLoader.php
includes/objectcache/RedisBagOStuff.php
includes/objectcache/SqlBagOStuff.php

diff --git a/includes/ArrayUtils.php b/includes/ArrayUtils.php
new file mode 100644 (file)
index 0000000..4ff31b8
--- /dev/null
@@ -0,0 +1,34 @@
+<?php
+
+class ArrayUtils {
+       /**
+        * Sort the given array in a pseudo-random order which depends only on the
+        * given key and each element value. This is typically used for load
+        * balancing between servers each with a local cache.
+        *
+        * Keys are preserved. The input array is modified in place.
+        *
+        * Note: Benchmarking on PHP 5.3 and 5.4 indicates that for small
+        * strings, md5() is only 10% slower than hash('joaat',...) etc.,
+        * since the function call overhead dominates. So there's not much
+        * justification for breaking compatibility with installations
+        * compiled with ./configure --disable-hash.
+        * 
+        * @param $array The array to sort
+        * @param $key The string key
+        * @param $separator A separator used to delimit the array elements and the
+        *     key. This can be chosen to provide backwards compatibility with 
+        *     various consistent hash implementations that existed before this
+        *     function was introduced.
+        */
+       static function consistentHashSort( &$array, $key, $separator = "\000" ) {
+               $hashes = array();
+               foreach ( $array as $elt ) {
+                       $hashes[$elt] = md5( $elt . $separator . $key );
+               }
+               uasort( $array, function ( $a, $b ) use ( $hashes ) {
+                       return strcmp( $hashes[$a], $hashes[$b] );
+               } );
+       }
+}
+
index ecfe208..fd34590 100644 (file)
@@ -33,6 +33,7 @@ $wgAutoloadLocalClasses = array(
        'AjaxDispatcher' => 'includes/AjaxDispatcher.php',
        'AjaxResponse' => 'includes/AjaxResponse.php',
        'AlphabeticPager' => 'includes/Pager.php',
+       'ArrayUtils' => 'includes/ArrayUtils.php',
        'Article' => 'includes/Article.php',
        'AtomFeed' => 'includes/Feed.php',
        'AuthPlugin' => 'includes/AuthPlugin.php',
index 40784f5..4715859 100644 (file)
@@ -288,23 +288,10 @@ class RedisBagOStuff extends BagOStuff {
                if ( count( $this->servers ) === 1 ) {
                        $candidates = $this->servers;
                } else {
-                       // Use consistent hashing
-                       //
-                       // Note: Benchmarking on PHP 5.3 and 5.4 indicates that for small
-                       // strings, md5() is only 10% slower than hash('joaat',...) etc.,
-                       // since the function call overhead dominates. So there's not much
-                       // justification for breaking compatibility with installations
-                       // compiled with ./configure --disable-hash.
-                       $hashes = array();
-                       foreach ( $this->servers as $server ) {
-                               $hashes[$server] = md5( $server . '/' . $key );
-                       }
-                       asort( $hashes );
+                       $candidates = $this->servers;
+                       ArrayUtils::consistentHashSort( $candidates, $key, '/' );
                        if ( !$this->automaticFailover ) {
-                               reset( $hashes );
-                               $candidates = array( key( $hashes ) );
-                       } else {
-                               $candidates = array_keys( $hashes );
+                               $candidates = array_slice( $candidates, 0, 1 );
                        }
                }
 
index 653fb15..eccfe00 100644 (file)
@@ -33,6 +33,7 @@ class SqlBagOStuff extends BagOStuff {
        var $lb;
 
        var $serverInfos;
+       var $serverNames;
        var $numServers;
        var $conns;
        var $lastExpireAll = 0;
@@ -75,6 +76,10 @@ class SqlBagOStuff extends BagOStuff {
                if ( isset( $params['servers'] ) ) {
                        $this->serverInfos = $params['servers'];
                        $this->numServers = count( $this->serverInfos );
+                       $this->serverNames = array();
+                       foreach ( $this->serverInfos as $i => $info ) {
+                               $this->serverNames[$i] = isset( $info['host'] ) ? $info['host'] : "#$i";
+                       }
                } elseif ( isset( $params['server'] ) ) {
                        $this->serverInfos = array( $params['server'] );
                        $this->numServers = count( $this->serverInfos );
@@ -154,16 +159,21 @@ class SqlBagOStuff extends BagOStuff {
         * @return Array: server index and table name
         */
        protected function getTableByKey( $key ) {
-               $numTables = $this->shards * $this->numServers ;
-               if ( $numTables > 1 ) {
-                       $hash = ( hexdec( substr( md5( $key ), 0, 8 ) ) & 0x7fffffff ) % $numTables;
+               if ( $this->shards > 1 ) {
+                       $hash = hexdec( substr( md5( $key ), 0, 8 ) ) & 0x7fffffff;
                        $tableIndex = $hash % $this->shards;
-                       $serverIndex = intval( round( ( $hash - $tableIndex ) / $this->shards ) );
-                       $tableName = $this->getTableNameByShard( $tableIndex );
-                       return array( $serverIndex, $tableName );
                } else {
-                       return array( 0, $this->tableName );
+                       $tableIndex = 0;
+               }
+               if ( $this->numServers > 1 ) {
+                       $sortedServers = $this->serverNames;
+                       ArrayUtils::consistentHashSort( $sortedServers, $key );
+                       reset( $sortedServers );
+                       $serverIndex = key( $sortedServers );
+               } else {
+                       $serverIndex = 0;
                }
+               return array( $serverIndex, $this->getTableNameByShard( $tableIndex ) );
        }
 
        /**