rdbms: fix active GTID filtering in DatabaseMysqlBase
authorAaron Schulz <aschulz@wikimedia.org>
Wed, 19 Jun 2019 08:10:15 +0000 (09:10 +0100)
committerMobrovac <mobrovac@wikimedia.org>
Wed, 25 Sep 2019 11:59:05 +0000 (11:59 +0000)
In masterPosWait(), only $pos will have the known active domain/server set
since it usually comes from getMasterPos(). However, the reference position,
from getReplicaDB(), does not have the active domains set since querying
gtid_domain_id on the replica would be incorrect and getting a connection
to the master could be expensive.

Remove obsolete hacks for jobs that used to store master positions.

Also, use the regular Database::query() method for stylistic consistency.

Bug: T224422
Change-Id: I41bbb9f337e46451aa17788dbd446db4a213a5a7

includes/libs/rdbms/database/DatabaseMysqlBase.php
includes/libs/rdbms/database/position/MySQLMasterPos.php
tests/phpunit/includes/libs/rdbms/database/DatabaseMysqlBaseTest.php

index 7be3b7d..464e68c 100644 (file)
@@ -850,27 +850,44 @@ abstract class DatabaseMysqlBase extends Database {
                }
 
                if ( $this->getLBInfo( 'is static' ) === true ) {
+                       $this->queryLogger->debug(
+                               "Bypassed replication wait; database has a static dataset",
+                               $this->getLogContext( [ 'method' => __METHOD__ ] )
+                       );
+
                        return 0; // this is a copy of a read-only dataset with no master DB
                } elseif ( $this->lastKnownReplicaPos && $this->lastKnownReplicaPos->hasReached( $pos ) ) {
+                       $this->queryLogger->debug(
+                               "Bypassed replication wait; replication already known to have reached $pos",
+                               $this->getLogContext( [ 'method' => __METHOD__ ] )
+                       );
+
                        return 0; // already reached this point for sure
                }
 
                // Call doQuery() directly, to avoid opening a transaction if DBO_TRX is set
                if ( $pos->getGTIDs() ) {
-                       // Ignore GTIDs from domains exclusive to the master DB (presumably inactive)
-                       $rpos = $this->getReplicaPos();
-                       $gtidsWait = $rpos ? MySQLMasterPos::getCommonDomainGTIDs( $pos, $rpos ) : [];
+                       // Get the GTIDs from this replica server too see the domains (channels)
+                       $refPos = $this->getReplicaPos();
+                       if ( !$refPos ) {
+                               $this->queryLogger->error(
+                                       "Could not get replication position",
+                                       $this->getLogContext( [ 'method' => __METHOD__ ] )
+                               );
+
+                               return -1; // this is the master itself?
+                       }
+                       // GTIDs with domains (channels) that are active and are present on the replica
+                       $gtidsWait = $pos::getRelevantActiveGTIDs( $pos, $refPos );
                        if ( !$gtidsWait ) {
                                $this->queryLogger->error(
-                                       "No GTIDs with the same domain between master ($pos) and replica ($rpos)",
-                                       $this->getLogContext( [
-                                               'method' => __METHOD__,
-                                       ] )
+                                       "No active GTIDs in $pos share a domain with those in $refPos",
+                                       $this->getLogContext( [ 'method' => __METHOD__, 'activeDomain' => $pos ] )
                                );
 
                                return -1; // $pos is from the wrong cluster?
                        }
-                       // Wait on the GTID set (MariaDB only)
+                       // Wait on the GTID set
                        $gtidArg = $this->addQuotes( implode( ',', $gtidsWait ) );
                        if ( strpos( $gtidArg, ':' ) !== false ) {
                                // MySQL GTIDs, e.g "source_id:transaction_id"
@@ -886,28 +903,28 @@ abstract class DatabaseMysqlBase extends Database {
                        $sql = "SELECT MASTER_POS_WAIT($encFile, $encPos, $timeout)";
                }
 
-               list( $res, $err ) = $this->executeQuery( $sql, __METHOD__, self::QUERY_IGNORE_DBO_TRX );
-               $row = $res ? $this->fetchRow( $res ) : false;
-               if ( !$row ) {
-                       throw new DBExpectedError( $this, "Replication wait failed: {$err}" );
-               }
+               $res = $this->query( $sql, __METHOD__, self::QUERY_IGNORE_DBO_TRX );
+               $row = $this->fetchRow( $res );
 
                // Result can be NULL (error), -1 (timeout), or 0+ per the MySQL manual
                $status = ( $row[0] !== null ) ? intval( $row[0] ) : null;
                if ( $status === null ) {
-                       if ( !$pos->getGTIDs() ) {
-                               // T126436: jobs programmed to wait on master positions might be referencing
-                               // binlogs with an old master hostname; this makes MASTER_POS_WAIT() return null.
-                               // Try to detect this case and treat the replica DB as having reached the given
-                               // position (any master switchover already requires that the new master be caught
-                               // up before the switch).
-                               $replicationPos = $this->getReplicaPos();
-                               if ( $replicationPos && !$replicationPos->channelsMatch( $pos ) ) {
-                                       $this->lastKnownReplicaPos = $replicationPos;
-                                       $status = 0;
-                               }
-                       }
+                       $this->queryLogger->error(
+                               "An error occurred while waiting for replication to reach $pos",
+                               $this->getLogContext( [ 'method' => __METHOD__, 'sql' => $sql ] )
+                       );
+               } elseif ( $status < 0 ) {
+                       $this->queryLogger->error(
+                               "Timed out waiting for replication to reach $pos",
+                               $this->getLogContext( [
+                                       'method' => __METHOD__, 'sql' => $sql, 'timeout' => $timeout
+                               ] )
+                       );
                } elseif ( $status >= 0 ) {
+                       $this->queryLogger->debug(
+                               "Replication has reached $pos",
+                               $this->getLogContext( [ 'method' => __METHOD__ ] )
+                       );
                        // Remember that this position was reached to save queries next time
                        $this->lastKnownReplicaPos = $pos;
                }
index fa2c1db..e1b8f9a 100644 (file)
@@ -190,39 +190,69 @@ class MySQLMasterPos implements DBMasterPos {
        }
 
        /**
+        * Set the GTID domain known to be used in new commits on a replication stream of interest
+        *
+        * This makes getRelevantActiveGTIDs() filter out GTIDs from other domains
+        *
+        * @see MySQLMasterPos::getRelevantActiveGTIDs()
+        * @see https://mariadb.com/kb/en/library/gtid/#gtid_domain_id
+        *
         * @param int|null $id @@gtid_domain_id of the active replication stream
+        * @return MySQLMasterPos This instance (since 1.34)
         * @since 1.31
         */
        public function setActiveDomain( $id ) {
                $this->activeDomain = (int)$id;
+
+               return $this;
        }
 
        /**
+        * Set the server ID known to be used in new commits on a replication stream of interest
+        *
+        * This makes getRelevantActiveGTIDs() filter out GTIDs from other origin servers
+        *
+        * @see MySQLMasterPos::getRelevantActiveGTIDs()
+        *
         * @param int|null $id @@server_id of the server were writes originate
+        * @return MySQLMasterPos This instance (since 1.34)
         * @since 1.31
         */
        public function setActiveOriginServerId( $id ) {
                $this->activeServerId = (int)$id;
+
+               return $this;
        }
 
        /**
+        * Set the server UUID known to be used in new commits on a replication stream of interest
+        *
+        * This makes getRelevantActiveGTIDs() filter out GTIDs from other origin servers
+        *
+        * @see MySQLMasterPos::getRelevantActiveGTIDs()
+        *
         * @param string|null $id @@server_uuid of the server were writes originate
+        * @return MySQLMasterPos This instance (since 1.34)
         * @since 1.31
         */
        public function setActiveOriginServerUUID( $id ) {
                $this->activeServerUUID = $id;
+
+               return $this;
        }
 
        /**
         * @param MySQLMasterPos $pos
         * @param MySQLMasterPos $refPos
-        * @return string[] List of GTIDs from $pos that have domains in $refPos
-        * @since 1.31
+        * @return string[] List of active GTIDs from $pos that have domains in $refPos
+        * @since 1.34
         */
-       public static function getCommonDomainGTIDs( MySQLMasterPos $pos, MySQLMasterPos $refPos ) {
-               return array_values(
-                       array_intersect_key( $pos->gtids, $refPos->getActiveGtidCoordinates() )
-               );
+       public static function getRelevantActiveGTIDs( MySQLMasterPos $pos, MySQLMasterPos $refPos ) {
+               return array_values( array_intersect_key(
+                       $pos->gtids,
+                       $pos->getActiveGtidCoordinates(),
+                       $refPos->gtids
+               ) );
        }
 
        /**
index 4c92545..6fff68f 100644 (file)
@@ -316,8 +316,8 @@ class DatabaseMysqlBaseTest extends PHPUnit\Framework\TestCase {
         * @dataProvider provideCommonDomainGTIDs
         * @covers Wikimedia\Rdbms\MySQLMasterPos
         */
-       public function testCommonGtidDomains( MySQLMasterPos $pos, MySQLMasterPos $ref, $gtids ) {
-               $this->assertEquals( $gtids, MySQLMasterPos::getCommonDomainGTIDs( $pos, $ref ) );
+       public function testGetRelevantActiveGTIDs( MySQLMasterPos $pos, MySQLMasterPos $ref, $gtids ) {
+               $this->assertEquals( $gtids, MySQLMasterPos::getRelevantActiveGTIDs( $pos, $ref ) );
        }
 
        public static function provideCommonDomainGTIDs() {
@@ -327,6 +327,12 @@ class DatabaseMysqlBaseTest extends PHPUnit\Framework\TestCase {
                                new MySQLMasterPos( '255-11-1000', 1 ),
                                [ '255-13-99' ]
                        ],
+                       [
+                               ( new MySQLMasterPos( '255-13-99,256-12-50,257-14-50', 1 ) )
+                                       ->setActiveDomain( 257 ),
+                               new MySQLMasterPos( '255-11-1000,257-14-30', 1 ),
+                               [ '257-14-50' ]
+                       ],
                        [
                                new MySQLMasterPos(
                                        '2E11FA47-71CA-11E1-9E33-C80AA9429562:1-5,' .