new load balancing algorithm
authorTim Starling <tstarling@users.mediawiki.org>
Wed, 1 Jun 2005 06:18:49 +0000 (06:18 +0000)
committerTim Starling <tstarling@users.mediawiki.org>
Wed, 1 Jun 2005 06:18:49 +0000 (06:18 +0000)
includes/Database.php
includes/GlobalFunctions.php
includes/LoadBalancer.php
includes/OutputPage.php
languages/Language.php

index 0dce6a9..9355231 100644 (file)
@@ -1447,6 +1447,38 @@ class Database {
        function ping() {
                return mysql_ping( $this->mConn );
        }
+       
+       /**
+        * Get slave lag.
+        * At the moment, this will only work if the DB user has the PROCESS privilege
+        */
+       function getLag() {
+               $res = $this->query( 'SHOW PROCESSLIST' );
+               # Find slave SQL thread. Assumed to be the second one running, which is a bit 
+               # dubious, but unfortunately there's no easy rigorous way
+               $slaveThreads = 0;
+               while ( $row = $this->fetchObject( $res ) ) {
+                       if ( $row->User == 'system user' ) {
+                               if ( ++$slaveThreads == 2 ) {
+                                       # This is it, return the time
+                                       return $row->Time;
+                               }
+                       }
+               }
+               return false;
+       }
+
+       /**
+        * Get status information from SHOW STATUS in an associative array
+        */
+       function getStatus() {
+               $res = $this->query( 'SHOW STATUS' );
+               $status = array();
+               while ( $row = $this->fetchObject( $res ) ) {
+                       $status[$row->Variable_name] = $row->Value;
+               }
+               return $status;
+       }
 } 
 
 /**
index af86f71..4764647 100644 (file)
@@ -216,8 +216,11 @@ function logProfilingData() {
  * @return bool
  */
 function wfReadOnly() {
-       global $wgReadOnlyFile;
+       global $wgReadOnlyFile, $wgReadOnly;
 
+       if ( $wgReadOnly ) {
+               return true;
+       }
        if ( '' == $wgReadOnlyFile ) {
                return false;
        }
index abbf936..c6f6f15 100644 (file)
@@ -91,6 +91,14 @@ class LoadBalancer {
                foreach ( $weights as $w ) {
                        $sum += $w;
                }
+
+               if ( $sum == 0 ) {
+                       # No loads on any of them
+                       # Just pick one at random
+                       foreach ( $weights as $i => $w ) {
+                               $weights[$i] = 1;
+                       }
+               }
                $max = mt_getrandmax();
                $rand = mt_rand(0, $max) / $max * $sum;
                
@@ -104,8 +112,44 @@ class LoadBalancer {
                return $i;
        }
 
+       function getRandomNonLagged( $loads ) {
+               # Unset excessively lagged servers
+               $lags = $this->getLagTimes();
+               foreach ( $lags as $i => $lag ) {
+                       if ( isset( $this->mServers[$i]['max lag'] ) && $lag > $this->mServers[$i]['max lag'] ) {
+                               unset( $loads[$i] );
+                       }
+               }
+
+
+               # Find out if all the slaves with non-zero load are lagged
+               $sum = 0;
+               foreach ( $loads as $load ) {
+                       $sum += $load;
+               }
+               if ( $sum == 0 ) {
+                       # No appropriate DB servers except maybe the master and some slaves with zero load
+                       # Do NOT use the master
+                       # Instead, this function will return false, triggering read-only mode, 
+                       # and a lagged slave will be used instead.
+                       unset ( $loads[0] );
+               }
+
+               if ( count( $loads ) == 0 ) {
+                       return false;
+               }
+
+               wfDebug( var_export( $loads, true ) );
+
+               # Return a random representative of the remainder
+               return $this->pickRandom( $loads );
+       }
+
+
        function getReaderIndex()
        {
+               global $wgMaxLag, $wgReadOnly;
+
                $fname = 'LoadBalancer::getReaderIndex';
                wfProfileIn( $fname );
 
@@ -119,8 +163,19 @@ class LoadBalancer {
                                # $loads is $this->mLoads except with elements knocked out if they
                                # don't work
                                $loads = $this->mLoads;
+                               $done = false;
+                               $totalElapsed = 0;
                                do {
-                                       $i = $this->pickRandom( $loads );
+                                       if ( $wgReadOnly ) {
+                                               $i = $this->pickRandom( $loads );
+                                       } else {
+                                               $i = $this->getRandomNonLagged( $loads );
+                                               if ( $i === false && count( $loads ) != 0 )  {
+                                                       # All slaves lagged. Switch to read-only mode
+                                                       $wgReadOnly = wfMsgNoDB( 'readonly_lag' );
+                                                       $i = $this->pickRandom( $loads );
+                                               }
+                                       }
                                        if ( $i !== false ) {
                                                wfDebug( "Using reader #$i: {$this->mServers[$i]['host']}...\n" );
                                                $this->openConnection( $i );
@@ -128,16 +183,32 @@ class LoadBalancer {
                                                if ( !$this->isOpen( $i ) ) {
                                                        wfDebug( "Failed\n" );
                                                        unset( $loads[$i] );
-                                               } elseif ( isset( $this->mServers[$i]['slave pos'] ) ) {
-                                                       wfDebug( "Lagged slave\n" );
-                                                       $this->mLaggedSlaveMode = true;
+                                                       $sleepTime = 0;
                                                } else {
-                                                       wfDebug( "OK\n" );
+                                                       $status = $this->mConnections[$i]->getStatus();
+                                                       if ( isset( $this->mServers[$i]['max threads'] ) && 
+                                                         $status['Threads_running'] > $this->mServers[$i]['max threads'] ) 
+                                                       {
+                                                               # Slave is lagged, wait for a while
+                                                               $sleepTime = 5000 * $status['Threads_connected'];
+
+                                                               # If we reach the timeout and exit the loop, don't use it
+                                                               $i = false;
+                                                       } else {
+                                                               $done = true;
+                                                               $sleepTime = 0;
+                                                       }
                                                }
+                                       } else {
+                                               $sleepTime = 500000;
+                                       }
+                                       if ( $sleepTime ) {
+                                                       $totalElapsed += $sleepTime;
+                                                       usleep( $sleepTime );
                                        }
-                               } while ( $i !== false && !$this->isOpen( $i ) );
+                               } while ( count( $loads ) && !$done && $totalElapsed / 1e6 < $this->mWaitTimeout );
 
-                               if ( $this->isOpen( $i ) ) {
+                               if ( $i !== false && $this->isOpen( $i ) ) {
                                        $this->mReadIndex = $i;
                                } else {
                                        $i = false;
@@ -167,6 +238,7 @@ class LoadBalancer {
         * Otherwise sets a variable telling it to wait if such a connection is opened
         */
        function waitFor( $file, $pos ) {
+               /*
                $fname = 'LoadBalancer::waitFor';
                wfProfileIn( $fname );
 
@@ -187,12 +259,15 @@ class LoadBalancer {
                        } 
                }
                wfProfileOut( $fname );
+               */
        }
 
        /**
         * Wait for a given slave to catch up to the master pos stored in $this
         */
        function doWait( $index ) {
+               return true;
+               /*
                global $wgMemc;
                
                $retVal = false;
@@ -228,7 +303,7 @@ class LoadBalancer {
                                wfDebug( "Done\n" );
                        }
                }
-               return $retVal;
+               return $retVal;*/
        }               
 
        /**
@@ -459,6 +534,63 @@ class LoadBalancer {
                }
                return $success;
        }
+
+       /**
+        * Get the hostname and lag time of the most-lagged slave
+        * This is useful for maintenance scripts that need to throttle their updates
+        */
+       function getMaxLag() {
+               $maxLag = -1;
+               $host = '';
+               foreach ( $this->mServers as $i => $conn ) {
+                       if ( $this->openConnection( $i ) ) {
+                               $lag = $this->mConnections[$i]->getLag();
+                               if ( $lag > $maxLag ) {
+                                       $maxLag = $lag;
+                                       $host = $this->mServers[$i]['host'];
+                               }
+                       }
+               }
+               return array( $host, $maxLag );
+       }
+       
+       /**
+        * Get lag time for each DB
+        * Results are cached for a short time in memcached
+        */
+       function getLagTimes() {
+               $expiry = 5;
+               $requestRate = 10;
+
+               global $wgMemc;
+               $times = $wgMemc->get( 'lag_times' );
+               if ( $times ) {
+                       # Randomly recache with probability rising over $expiry
+                       $elapsed = time() - $times['timestamp'];
+                       $chance = max( 0, ( $expiry - $elapsed ) * $requestRate );
+                       if ( mt_rand( 0, $chance ) != 0 ) {
+                               unset( $times['timestamp'] );
+                               return $times;
+                       }
+               }
+
+               # Cache key missing or expired
+
+               $times = array();
+               foreach ( $this->mServers as $i => $conn ) {
+                       if ( $this->openConnection( $i ) ) {
+                               $times[$i] = $this->mConnections[$i]->getLag();
+                       }
+               }
+
+               # Add a timestamp key so we know when it was cached
+               $times['timestamp'] = time();
+               $wgMemc->set( 'lag_times', $times, $expiry );
+
+               # But don't give the timestamp to the caller
+               unset($times['timestamp']);
+               return $times;
+       }
 }
 
 ?>
index 879f5e8..624ab6c 100644 (file)
@@ -611,7 +611,7 @@ class OutputPage {
        }
 
        function readOnlyPage( $source = null, $protected = false ) {
-               global $wgUser, $wgReadOnlyFile;
+               global $wgUser, $wgReadOnlyFile, $wgReadOnly;
 
                $this->setRobotpolicy( 'noindex,nofollow' );
                $this->setArticleRelated( false );
@@ -621,7 +621,11 @@ class OutputPage {
                        $this->addWikiText( wfMsg( 'protectedtext' ) );
                } else {
                        $this->setPageTitle( wfMsg( 'readonly' ) );
-                       $reason = file_get_contents( $wgReadOnlyFile );
+                       if ( $wgReadOnly ) {
+                               $reason = $wgReadOnly;
+                       } else {
+                               $reason = file_get_contents( $wgReadOnlyFile );
+                       }
                        $this->addWikiText( wfMsg( 'readonlytext', $reason ) );
                }
 
index ce983a6..2186247 100644 (file)
@@ -520,6 +520,7 @@ page that has been deleted.
 
 <p>If this is not the case, you may have found a bug in the software.
 Please report this to an administrator, making note of the URL.",
+'readonly_lag' => "The database has been automatically locked while the slave database servers catch up to the master",
 'internalerror' => 'Internal error',
 'filecopyerror' => "Could not copy file \"$1\" to \"$2\".",
 'filerenameerror' => "Could not rename file \"$1\" to \"$2\".",