define( 'DB_READ', -1 );
define( 'DB_WRITE', -2 );
+
+# Scale polling time so that under overload conditions, the database server
+# receives a SHOW STATUS query at an average interval of this many microseconds
+define( 'AVG_STATUS_POLL', 2000 );
+
+
/**
* Database load balancing object
*
*/
class LoadBalancer {
/* private */ var $mServers, $mConnections, $mLoads, $mGroupLoads;
- /* private */ var $mFailFunction;
+ /* private */ var $mFailFunction, $mErrorConnection;
/* private */ var $mForce, $mReadIndex, $mLastIndex;
/* private */ var $mWaitForFile, $mWaitForPos, $mWaitTimeout;
/* private */ var $mLaggedSlaveMode;
$this->mReadIndex = -1;
$this->mForce = -1;
$this->mLastIndex = -1;
+ $this->mErrorConnection = false;
}
function newFromParams( $servers, $failFunction = false, $waitTimeout = 10 )
{
$lb = new LoadBalancer;
- $lb->initialise( $servers, $failFunction = false );
+ $lb->initialise( $servers, $failFunction, $waitTimeout );
return $lb;
}
$this->mGroupLoads[$group][$i] = $ratio;
}
}
- }
+ }
}
-
+
/**
* Given an array of non-normalised probabilities, this function will select
* an element and return the appropriate key
foreach ( $weights as $w ) {
$sum += $w;
}
+
+ if ( $sum == 0 ) {
+ # No loads on any of them
+ # Just pick one at random
+ foreach ( $weights as $i => $w ) {
+ $weights[$i] = 1;
+ }
+ }
$max = mt_getrandmax();
$rand = mt_rand(0, $max) / $max * $sum;
-
+
$sum = 0;
foreach ( $weights as $i => $w ) {
$sum += $w;
return $i;
}
+ function getRandomNonLagged( $loads ) {
+ # Unset excessively lagged servers
+ $lags = $this->getLagTimes();
+ foreach ( $lags as $i => $lag ) {
+ if ( isset( $this->mServers[$i]['max lag'] ) && $lag > $this->mServers[$i]['max lag'] ) {
+ unset( $loads[$i] );
+ }
+ }
+
+
+ # Find out if all the slaves with non-zero load are lagged
+ $sum = 0;
+ foreach ( $loads as $load ) {
+ $sum += $load;
+ }
+ if ( $sum == 0 ) {
+ # No appropriate DB servers except maybe the master and some slaves with zero load
+ # Do NOT use the master
+ # Instead, this function will return false, triggering read-only mode,
+ # and a lagged slave will be used instead.
+ unset ( $loads[0] );
+ }
+
+ if ( count( $loads ) == 0 ) {
+ return false;
+ }
+
+ #wfDebugLog( 'connect', var_export( $loads, true ) );
+
+ # Return a random representative of the remainder
+ return $this->pickRandom( $loads );
+ }
+
+ /**
+ * Get the index of the reader connection, which may be a slave
+ * This takes into account load ratios and lag times. It should
+ * always return a consistent index during a given invocation
+ *
+ * Side effect: opens connections to databases
+ */
function getReaderIndex()
{
+ global $wgMaxLag, $wgReadOnly, $wgDBClusterTimeout;
+
$fname = 'LoadBalancer::getReaderIndex';
wfProfileIn( $fname );
# $loads is $this->mLoads except with elements knocked out if they
# don't work
$loads = $this->mLoads;
+ $done = false;
+ $totalElapsed = 0;
do {
- $i = $this->pickRandom( $loads );
+ if ( $wgReadOnly ) {
+ $i = $this->pickRandom( $loads );
+ } else {
+ $i = $this->getRandomNonLagged( $loads );
+ if ( $i === false && count( $loads ) != 0 ) {
+ # All slaves lagged. Switch to read-only mode
+ $wgReadOnly = wfMsgNoDB( 'readonly_lag' );
+ $i = $this->pickRandom( $loads );
+ }
+ }
+ $serverIndex = $i;
if ( $i !== false ) {
- wfDebug( "Using reader #$i: {$this->mServers[$i]['host']}...\n" );
+ wfDebugLog( 'connect', "Using reader #$i: {$this->mServers[$i]['host']}...\n" );
$this->openConnection( $i );
-
+
if ( !$this->isOpen( $i ) ) {
wfDebug( "Failed\n" );
unset( $loads[$i] );
- } elseif ( isset( $this->mServers[$i]['slave pos'] ) ) {
- wfDebug( "Lagged slave\n" );
- $this->mLaggedSlaveMode = true;
+ $sleepTime = 0;
} else {
- wfDebug( "OK\n" );
+ $status = $this->mConnections[$i]->getStatus();
+ if ( isset( $this->mServers[$i]['max threads'] ) &&
+ $status['Threads_running'] > $this->mServers[$i]['max threads'] )
+ {
+ # Slave is lagged, wait for a while
+ $sleepTime = AVG_STATUS_POLL * $status['Threads_connected'];
+
+ # If we reach the timeout and exit the loop, don't use it
+ $i = false;
+ } else {
+ $done = true;
+ $sleepTime = 0;
+ }
}
+ } else {
+ $sleepTime = 500000;
+ }
+ if ( $sleepTime ) {
+ $totalElapsed += $sleepTime;
+ $x = "{$this->mServers[$serverIndex]['host']} [$serverIndex]";
+ wfProfileIn( "$fname-sleep $x" );
+ usleep( $sleepTime );
+ wfProfileOut( "$fname-sleep $x" );
}
- } while ( $i !== false && !$this->isOpen( $i ) );
+ } while ( count( $loads ) && !$done && $totalElapsed / 1e6 < $wgDBClusterTimeout );
- if ( $this->isOpen( $i ) ) {
- $this->mReadIndex = $i;
+ if ( $i !== false && $this->isOpen( $i ) ) {
+ # Wait for the session master pos for a short time
+ if ( $this->mWaitForFile ) {
+ if ( !$this->doWait( $i ) ) {
+ $this->mServers[$i]['slave pos'] = $this->mConnections[$i]->getSlavePos();
+ }
+ }
+ if ( $i !== false ) {
+ $this->mReadIndex = $i;
+ }
} else {
$i = false;
}
wfProfileOut( $fname );
return $i;
}
-
+
/**
* Get a random server to use in a query group
*/
wfDebug( "Query group $group => $i\n" );
return $i;
}
-
+
/**
* Set the master wait position
* If a DB_SLAVE connection has been opened already, waits
$this->mServers[$i]['slave pos'] = $this->mConnections[$i]->getSlavePos();
$this->mLaggedSlaveMode = true;
}
- }
+ }
}
wfProfileOut( $fname );
}
*/
function doWait( $index ) {
global $wgMemc;
-
+
$retVal = false;
# Debugging hacks
}
}
return $retVal;
- }
+ }
/**
* Get a connection by index
{
$fname = 'LoadBalancer::getConnection';
wfProfileIn( $fname );
-
+
# Query groups
$groupIndex = false;
foreach ( $groups as $group ) {
break;
}
}
-
+
# Operation-based index
- if ( $i == DB_SLAVE ) {
+ if ( $i == DB_SLAVE ) {
$i = $this->getReaderIndex();
} elseif ( $i == DB_MASTER ) {
$i = $this->getWriterIndex();
$i = $this->getWriterIndex();
}
}
+ # Couldn't find a working server in getReaderIndex()?
+ if ( $i === false ) {
+ $this->reportConnectionError( $this->mErrorConnection );
+ }
# Now we have an explicit index into the servers array
$this->openConnection( $i, $fail );
-
+
wfProfileOut( $fname );
return $this->mConnections[$i];
}
if ( !$this->isOpen( $i ) ) {
$this->mConnections[$i] = $this->reallyOpenConnection( $this->mServers[$i] );
-
- if ( $this->isOpen( $i ) && $i != 0 && $this->mWaitForFile ) {
- if ( !$this->doWait( $i ) ) {
- $this->mServers[$i]['slave pos'] = $this->mConnections[$i]->getSlavePos();
- $success = false;
- }
- }
}
+
if ( !$this->isOpen( $i ) ) {
wfDebug( "Failed to connect to database $i at {$this->mServers[$i]['host']}\n" );
if ( $fail ) {
$this->reportConnectionError( $this->mConnections[$i] );
}
+ $this->mErrorConnection = $this->mConnections[$i];
$this->mConnections[$i] = false;
$success = false;
}
if( !is_integer( $index ) ) {
return false;
}
- if ( array_key_exists( $index, $this->mConnections ) && is_object( $this->mConnections[$index] ) &&
- $this->mConnections[$index]->isOpen() )
+ if ( array_key_exists( $index, $this->mConnections ) && is_object( $this->mConnections[$index] ) &&
+ $this->mConnections[$index]->isOpen() )
{
return true;
} else {
return false;
}
}
-
+
/**
* Really opens a connection
* @private
if( !is_array( $server ) ) {
wfDebugDieBacktrace( 'You must update your load-balancing configuration. See DefaultSettings.php entry for $wgDBservers.' );
}
-
+
extract( $server );
# Get class for this database type
$class = 'Database' . ucfirst( $type );
# Create object
return new $class( $host, $user, $password, $dbname, 1, $flags );
}
-
+
function reportConnectionError( &$conn )
{
$fname = 'LoadBalancer::reportConnectionError';
wfProfileIn( $fname );
# Prevent infinite recursion
-
+
static $reporting = false;
if ( !$reporting ) {
$reporting = true;
if ( $this->mFailFunction ) {
$conn->failFunction( $this->mFailFunction );
} else {
- $conn->failFunction( 'wfEmergencyAbort' );
+ $conn->failFunction( false );
}
$conn->reportConnectionError();
$reporting = false;
}
wfProfileOut( $fname );
}
-
+
function getWriterIndex()
{
return 0;
function saveMasterPos() {
global $wgSessionStarted;
if ( $wgSessionStarted && count( $this->mServers ) > 1 ) {
- # If this entire request was served from a slave without opening a connection to the
+ # If this entire request was served from a slave without opening a connection to the
# master (however unlikely that may be), then we can fetch the position from the slave.
if ( empty( $this->mConnections[0] ) ) {
$conn =& $this->getConnection( DB_SLAVE );
function getLaggedSlaveMode() {
return $this->mLaggedSlaveMode;
}
+
+ function pingAll() {
+ $success = true;
+ foreach ( $this->mConnections as $i => $conn ) {
+ if ( $this->isOpen( $i ) ) {
+ if ( !$this->mConnections[$i]->ping() ) {
+ $success = false;
+ }
+ }
+ }
+ return $success;
+ }
+
+ /**
+ * Get the hostname and lag time of the most-lagged slave
+ * This is useful for maintenance scripts that need to throttle their updates
+ */
+ function getMaxLag() {
+ $maxLag = -1;
+ $host = '';
+ foreach ( $this->mServers as $i => $conn ) {
+ if ( $this->openConnection( $i ) ) {
+ $lag = $this->mConnections[$i]->getLag();
+ if ( $lag > $maxLag ) {
+ $maxLag = $lag;
+ $host = $this->mServers[$i]['host'];
+ }
+ }
+ }
+ return array( $host, $maxLag );
+ }
+
+ /**
+ * Get lag time for each DB
+ * Results are cached for a short time in memcached
+ */
+ function getLagTimes() {
+ $expiry = 5;
+ $requestRate = 10;
+
+ global $wgMemc;
+ $times = $wgMemc->get( 'lag_times' );
+ if ( $times ) {
+ # Randomly recache with probability rising over $expiry
+ $elapsed = time() - $times['timestamp'];
+ $chance = max( 0, ( $expiry - $elapsed ) * $requestRate );
+ if ( mt_rand( 0, $chance ) != 0 ) {
+ unset( $times['timestamp'] );
+ return $times;
+ }
+ }
+
+ # Cache key missing or expired
+
+ $times = array();
+ foreach ( $this->mServers as $i => $conn ) {
+ if ( $this->openConnection( $i ) ) {
+ $times[$i] = $this->mConnections[$i]->getLag();
+ }
+ }
+
+ # Add a timestamp key so we know when it was cached
+ $times['timestamp'] = time();
+ $wgMemc->set( 'lag_times', $times, $expiry );
+
+ # But don't give the timestamp to the caller
+ unset($times['timestamp']);
+ return $times;
+ }
}
+
+?>