phpcs: More require/include is not a function
[lhc/web/wiklou.git] / maintenance / storage / trackBlobs.php
index 653a8c3..7857dd9 100644 (file)
@@ -1,23 +1,48 @@
 <?php
-
-require( dirname( __FILE__ ) .'/../commandLine.inc' );
+/**
+ * Adds blobs from a given external storage cluster to the blob_tracking table.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ * @see wfWaitForSlaves()
+ */
+
+require __DIR__ . '/../commandLine.inc';
 
 
 if ( count( $args ) < 1 ) {
        echo "Usage: php trackBlobs.php <cluster> [... <cluster>]\n";
        echo "Adds blobs from a given ES cluster to the blob_tracking table\n";
+       echo "Automatically deletes the tracking table and starts from the start again when restarted.\n";
+
        exit( 1 );
 }
 $tracker = new TrackBlobs( $args );
-$tracker->trackBlobs();
+$tracker->run();
+echo "All done.\n";
 
 class TrackBlobs {
-       var $clusters, $textClause;
-       var $doBlobOrphans;
-       var $trackedBlobs = array();
+       public $clusters, $textClause;
+       public $doBlobOrphans;
+       public $trackedBlobs = array();
 
-       var $batchSize = 1000;
-       var $reportingInterval = 10;
+       public $batchSize = 1000;
+       public $reportingInterval = 10;
 
        function __construct( $clusters ) {
                $this->clusters = $clusters;
@@ -31,7 +56,8 @@ class TrackBlobs {
                }
        }
 
-       function trackBlobs() {
+       function run() {
+               $this->checkIntegrity();
                $this->initTrackingTable();
                $this->trackRevisions();
                $this->trackOrphanText();
@@ -40,11 +66,54 @@ class TrackBlobs {
                }
        }
 
+       function checkIntegrity() {
+               echo "Doing integrity check...\n";
+               $dbr = wfGetDB( DB_SLAVE );
+
+               // Scan for HistoryBlobStub objects in the text table (bug 20757)
+
+               $exists = $dbr->selectField( 'text', 1,
+                       'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' .
+                       'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
+                       __METHOD__
+               );
+
+               if ( $exists ) {
+                       echo "Integrity check failed: found HistoryBlobStub objects in your text table.\n" .
+                               "This script could destroy these objects if it continued. Run resolveStubs.php\n" .
+                               "to fix this.\n";
+                       exit( 1 );
+               }
+
+               // Scan the archive table for HistoryBlobStub objects or external flags (bug 22624)
+               $flags = $dbr->selectField( 'archive', 'ar_flags',
+                       'ar_flags LIKE \'%external%\' OR (' .
+                       'ar_flags LIKE \'%object%\' ' .
+                       'AND LOWER(CONVERT(LEFT(ar_text,22) USING latin1)) = \'o:15:"historyblobstub"\' )',
+                       __METHOD__
+               );
+
+               if ( strpos( $flags, 'external' ) !== false ) {
+                       echo "Integrity check failed: found external storage pointers in your archive table.\n" .
+                               "Run normaliseArchiveTable.php to fix this.\n";
+                       exit( 1 );
+               } elseif ( $flags ) {
+                       echo "Integrity check failed: found HistoryBlobStub objects in your archive table.\n" .
+                               "These objects are probably already broken, continuing would make them\n" .
+                               "unrecoverable. Run \"normaliseArchiveTable.php --fix-cgz-bug\" to fix this.\n";
+                       exit( 1 );
+               }
+
+               echo "Integrity check OK\n";
+       }
+
        function initTrackingTable() {
                $dbw = wfGetDB( DB_MASTER );
-               if ( !$dbw->tableExists( 'blob_tracking' ) ) {
-                       $dbw->sourceFile( dirname( __FILE__ ) . '/blob_tracking.sql' );
+               if ( $dbw->tableExists( 'blob_tracking' ) ) {
+                       $dbw->query( 'DROP TABLE ' . $dbw->tableName( 'blob_tracking' ) );
+                       $dbw->query( 'DROP TABLE ' . $dbw->tableName( 'blob_orphans' ) );
                }
+               $dbw->sourceFile( __DIR__ . '/blob_tracking.sql' );
        }
 
        function getTextClause() {
@@ -55,7 +124,7 @@ class TrackBlobs {
                                if ( $this->textClause != '' ) {
                                        $this->textClause .= ' OR ';
                                }
-                               $this->textClause .= 'old_text LIKE ' . $dbr->addQuotes( $dbr->escapeLike( "DB://$cluster/" ) . '%' );
+                               $this->textClause .= 'old_text' . $dbr->buildLike( "DB://$cluster/", $dbr->anyString() );
                        }
                }
                return $this->textClause;
@@ -68,7 +137,7 @@ class TrackBlobs {
                return array(
                        'cluster' => $m[1],
                        'id' => intval( $m[2] ),
-                       'hash' => isset( $m[3] ) ? $m[2] : null
+                       'hash' => isset( $m[3] ) ? $m[3] : null
                );
        }
 
@@ -94,7 +163,7 @@ class TrackBlobs {
                                        'rev_id > ' . $dbr->addQuotes( $startId ),
                                        'rev_text_id=old_id',
                                        $textClause,
-                                       "old_flags LIKE '%external%'",
+                                       'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
                                ),
                                __METHOD__,
                                array(
@@ -137,7 +206,7 @@ class TrackBlobs {
                        if ( $batchesDone >= $this->reportingInterval ) {
                                $batchesDone = 0;
                                echo "$startId / $endId\n";
-                               wfWaitForSlaves( 5 );
+                               wfWaitForSlaves();
                        }
                }
                echo "Found $rowsInserted revisions\n";
@@ -165,18 +234,18 @@ class TrackBlobs {
 
                # Scan the text table for orphan text
                while ( true ) {
-                       $res = $dbr->select( array( 'text', 'blob_tracking' ), 
+                       $res = $dbr->select( array( 'text', 'blob_tracking' ),
                                array( 'old_id', 'old_flags', 'old_text' ),
-                               array( 
+                               array(
                                        'old_id>' . $dbr->addQuotes( $startId ),
                                        $textClause,
-                                       "old_flags LIKE '%external%'",
+                                       'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
                                        'bt_text_id IS NULL'
                                ),
                                __METHOD__,
                                array(
                                        'ORDER BY' => 'old_id',
-                                       'LIMIT' => $this->batchSize 
+                                       'LIMIT' => $this->batchSize
                                ),
                                array( 'blob_tracking' => array( 'LEFT JOIN', 'bt_text_id=old_id' ) )
                        );
@@ -221,7 +290,7 @@ class TrackBlobs {
                        if ( $batchesDone >= $this->reportingInterval ) {
                                $batchesDone = 0;
                                echo "$startId / $endId\n";
-                               wfWaitForSlaves( 5 );
+                               wfWaitForSlaves();
                        }
                }
                echo "Found $rowsInserted orphan text rows\n";
@@ -240,25 +309,38 @@ class TrackBlobs {
                        return;
                }
 
-               # Wait until the blob_tracking table is available in the slave
                $dbw = wfGetDB( DB_MASTER );
-               $dbr = wfGetDB( DB_SLAVE );
-               $pos = $dbw->getMasterPos();
-               $dbr->masterPosWait( $pos, 100000 );
 
                foreach ( $this->clusters as $cluster ) {
                        echo "Searching for orphan blobs in $cluster...\n";
                        $lb = wfGetLBFactory()->getExternalLB( $cluster );
-                       $extDB = $lb->getConnection( DB_SLAVE );
+                       try {
+                               $extDB = $lb->getConnection( DB_SLAVE );
+                       } catch ( DBConnectionError $e ) {
+                               if ( strpos( $e->error, 'Unknown database' ) !== false ) {
+                                       echo "No database on $cluster\n";
+                               } else {
+                                       echo "Error on $cluster: " . $e->getMessage() . "\n";
+                               }
+                               continue;
+                       }
+                       $table = $extDB->getLBInfo( 'blobs table' );
+                       if ( is_null( $table ) ) {
+                               $table = 'blobs';
+                       }
+                       if ( !$extDB->tableExists( $table ) ) {
+                               echo "No blobs table on cluster $cluster\n";
+                               continue;
+                       }
                        $startId = 0;
                        $batchesDone = 0;
                        $actualBlobs = gmp_init( 0 );
-                       $endId = $extDB->selectField( 'blobs', 'MAX(blob_id)', false, __METHOD__ );
+                       $endId = $extDB->selectField( $table, 'MAX(blob_id)', false, __METHOD__ );
 
                        // Build a bitmap of actual blob rows
                        while ( true ) {
-                               $res = $extDB->select( 'blobs', 
-                                       array( 'blob_id' ), 
+                               $res = $extDB->select( $table,
+                                       array( 'blob_id' ),
                                        array( 'blob_id > ' . $extDB->addQuotes( $startId ) ),
                                        __METHOD__,
                                        array( 'LIMIT' => $this->batchSize, 'ORDER BY' => 'blob_id' )
@@ -283,10 +365,11 @@ class TrackBlobs {
                        // Find actual blobs that weren't tracked by the previous passes
                        // This is a set-theoretic difference A \ B, or in bitwise terms, A & ~B
                        $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) );
-                       
+
                        // Traverse the orphan list
                        $insertBatch = array();
                        $id = 0;
+                       $numOrphans = 0;
                        while ( true ) {
                                $id = gmp_scan1( $orphans, $id );
                                if ( $id == -1 ) {
@@ -296,12 +379,18 @@ class TrackBlobs {
                                        'bo_cluster' => $cluster,
                                        'bo_blob_id' => $id
                                );
+                               if ( count( $insertBatch ) > $this->batchSize ) {
+                                       $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ );
+                                       $insertBatch = array();
+                               }
+
                                ++$id;
+                               ++$numOrphans;
                        }
-
-                       // Insert the batch
-                       echo "Found " . count( $insertBatch ) . " orphan(s) in $cluster\n";
-                       $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ );
+                       if ( $insertBatch ) {
+                               $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ );
+                       }
+                       echo "Found $numOrphans orphan(s) in $cluster\n";
                }
        }
 }