Add maintenance to populate change_tag_def table and ct_tag_id field
authorAmir Sarabadani <ladsgroup@gmail.com>
Tue, 5 Jun 2018 22:59:11 +0000 (00:59 +0200)
committerAmir Sarabadani <ladsgroup@gmail.com>
Fri, 15 Jun 2018 15:57:27 +0000 (17:57 +0200)
Bug: T193871
Change-Id: I5e863ffcfad5f2b66fb8d50666494acae3480d1a

autoload.php
maintenance/populateChangeTagDef.php [new file with mode: 0644]
tests/phpunit/maintenance/populateChangeTagDefTest.php [new file with mode: 0644]

index 67cd3b9..76b41d8 100644 (file)
@@ -1100,6 +1100,7 @@ $wgAutoloadLocalClasses = [
        'PopulateArchiveRevId' => __DIR__ . '/maintenance/populateArchiveRevId.php',
        'PopulateBacklinkNamespace' => __DIR__ . '/maintenance/populateBacklinkNamespace.php',
        'PopulateCategory' => __DIR__ . '/maintenance/populateCategory.php',
+       'PopulateChangeTagDef' => __DIR__ . '/maintenance/populateChangeTagDef.php',
        'PopulateContentModel' => __DIR__ . '/maintenance/populateContentModel.php',
        'PopulateExternallinksIndex60' => __DIR__ . '/maintenance/populateExternallinksIndex60.php',
        'PopulateFilearchiveSha1' => __DIR__ . '/maintenance/populateFilearchiveSha1.php',
diff --git a/maintenance/populateChangeTagDef.php b/maintenance/populateChangeTagDef.php
new file mode 100644 (file)
index 0000000..c815d8d
--- /dev/null
@@ -0,0 +1,192 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Populate and improve accuracy of change_tag_def statistics.
+ *
+ * @ingroup Maintenance
+ */
+class PopulateChangeTagDef extends Maintenance {
+       /** @var Wikimedia\Rdbms\LBFactory */
+       protected $lbFactory;
+
+       public function __construct() {
+               parent::__construct();
+               $this->addDescription( 'Populate and improve accuracy of change_tag_def statistics' );
+               $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' );
+               $this->setBatchSize( 1000 );
+               $this->addOption(
+                       'sleep',
+                       'Sleep time (in seconds) between every batch',
+                       false,
+                       true
+               );
+               $this->lbFactory = MediaWiki\MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+       }
+
+       public function execute() {
+               global $wgChangeTagsSchemaMigrationStage;
+               $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) );
+
+               $this->countDown( 5 );
+               if ( $wgChangeTagsSchemaMigrationStage < MIGRATION_NEW ) {
+                       $this->updateCountTag();
+                       $this->backpopulateChangeTagId();
+               } else {
+                       $this->updateCountTagId();
+               }
+
+               // TODO: Implement
+               // $this->cleanZeroCountRows();
+       }
+
+       private function updateCountTagId() {
+               $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA );
+
+               // This query can be pretty expensive, don't run it on master
+               $res = $dbr->select(
+                       'change_tag',
+                       [ 'ct_tag_id', 'hitcount' => 'count(*)' ],
+                       [],
+                       __METHOD__,
+                       [ 'GROUP BY' => 'ct_tag_id' ]
+               );
+
+               $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER );
+
+               foreach ( $res as $row ) {
+                       if ( !$row->ct_tag_id ) {
+                               continue;
+                       }
+
+                       if ( $this->hasOption( 'dry-run' ) ) {
+                               $this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" );
+                               continue;
+                       }
+
+                       $dbw->update(
+                               'change_tag_def',
+                               [ 'ctd_count' => $row->hitcount ],
+                               [ 'ctd_id' => $row->ct_tag_id ],
+                               __METHOD__
+                       );
+               }
+               $this->lbFactory->waitForReplication();
+       }
+
+       private function updateCountTag() {
+               $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA );
+
+               // This query can be pretty expensive, don't run it on master
+               $res = $dbr->select(
+                       'change_tag',
+                       [ 'ct_tag', 'hitcount' => 'count(*)' ],
+                       [],
+                       __METHOD__,
+                       [ 'GROUP BY' => 'ct_tag' ]
+               );
+
+               $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER );
+
+               foreach ( $res as $row ) {
+                       // Hygiene check
+                       if ( !$row->ct_tag ) {
+                               continue;
+                       }
+
+                       if ( $this->hasOption( 'dry-run' ) ) {
+                               $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" );
+                               continue;
+                       }
+
+                       $dbw->upsert(
+                               'change_tag_def',
+                               [
+                                       'ctd_name' => $row->ct_tag,
+                                       'ctd_user_defined' => 0,
+                                       'ctd_count' => $row->hitcount
+                               ],
+                               [ 'ctd_name' ],
+                               [ 'ctd_count' => $row->hitcount ],
+                               __METHOD__
+                       );
+               }
+               $this->lbFactory->waitForReplication();
+       }
+
+       private function backpopulateChangeTagId() {
+               $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA );
+               $changeTagDefs = $dbr->select(
+                       'change_tag_def',
+                       [ 'ctd_name', 'ctd_id' ],
+                       [],
+                       __METHOD__
+               );
+
+               foreach ( $changeTagDefs as $row ) {
+                       $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id );
+               }
+       }
+
+       private function backpopulateChangeTagPerTag( $tagName, $tagId ) {
+               $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA );
+               $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER );
+               $sleep = (int)$this->getOption( 'sleep', 10 );
+               $lastId = 0;
+               while ( true ) {
+                       // Given that indexes might not be there, it's better to use replica
+                       $ids = $dbr->selectFieldValues(
+                               'change_tag',
+                               'ct_id',
+                               [ 'ct_tag' => $tagName, 'ct_tag_id' => null, 'ct_id > ' . $lastId ],
+                               __METHOD__,
+                               [ 'LIMIT' => $this->getBatchSize() ]
+                       );
+
+                       if ( !$ids ) {
+                               break;
+                       }
+                       $lastId = end( $ids );
+
+                       if ( $this->hasOption( 'dry-run' ) ) {
+                               $this->output(
+                                       "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n"
+                               );
+                               continue;
+                       }
+
+                       $dbw->update(
+                               'change_tag',
+                               [ 'ct_tag_id' => $tagId ],
+                               [ 'ct_id' => $ids ],
+                               __METHOD__
+                       );
+
+                       $this->lbFactory->waitForReplication();
+                       if ( $sleep > 0 ) {
+                               sleep( $sleep );
+                       }
+               }
+       }
+
+}
+
+$maintClass = PopulateChangeTagDef::class;
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/tests/phpunit/maintenance/populateChangeTagDefTest.php b/tests/phpunit/maintenance/populateChangeTagDefTest.php
new file mode 100644 (file)
index 0000000..719b46b
--- /dev/null
@@ -0,0 +1,286 @@
+<?php
+
+namespace MediaWiki\Tests\Maintenance;
+
+use PopulateChangeTagDef;
+
+/**
+ * @group Database
+ * @covers PopulateChangeTagDef
+ */
+class PopulateChangeTagDefTest extends MaintenanceBaseTestCase {
+
+       public function getMaintenanceClass() {
+               return PopulateChangeTagDef::class;
+       }
+
+       public function setUp() {
+               parent::setUp();
+               $this->tablesUsed = [ 'change_tag', 'change_tag_def' ];
+
+               $this->cleanChangeTagTables();
+               $this->insertChangeTagData();
+       }
+
+       private function cleanChangeTagTables() {
+               wfGetDB( DB_MASTER )->delete( 'change_tag', '*' );
+               wfGetDB( DB_MASTER )->delete( 'change_tag_def', '*' );
+       }
+
+       private function insertChangeTagData() {
+               $changeTags = [];
+
+               $changeTags[] = [
+                       'ct_rc_id' => 1234,
+                       'ct_tag' => 'One Tag',
+               ];
+
+               $changeTags[] = [
+                       'ct_rc_id' => 1235,
+                       'ct_tag' => 'Two Tags',
+               ];
+
+               $changeTags[] = [
+                       'ct_log_id' => 1236,
+                       'ct_tag' => 'Two Tags',
+               ];
+
+               $changeTags[] = [
+                       'ct_rev_id' => 1237,
+                       'ct_tag' => 'Three Tags',
+               ];
+
+               $changeTags[] = [
+                       'ct_rc_id' => 1238,
+                       'ct_tag' => 'Three Tags',
+               ];
+
+               $changeTags[] = [
+                       'ct_log_id' => 1239,
+                       'ct_tag' => 'Three Tags',
+               ];
+
+               wfGetDB( DB_MASTER )->insert( 'change_tag', $changeTags );
+       }
+
+       public function testRun() {
+               $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_WRITE_BOTH );
+               $this->maintenance->loadWithArgv( [ '--sleep', '0' ] );
+
+               $this->maintenance->execute();
+
+               $changeTagDefRows = [
+                       (object)[
+                               'ctd_name' => 'One Tag',
+                               'ctd_count' => 1,
+                       ],
+                       (object)[
+                               'ctd_name' => 'Two Tags',
+                               'ctd_count' => 2,
+                       ],
+                       (object)[
+                               'ctd_name' => 'Three Tags',
+                               'ctd_count' => 3,
+                       ],
+               ];
+
+               $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select(
+                       [ 'change_tag_def' ],
+                       [ 'ctd_name', 'ctd_count' ],
+                       [],
+                       __METHOD__,
+                       [ 'ORDER BY' => 'ctd_count' ]
+               );
+
+               $this->assertEquals( $changeTagDefRows, iterator_to_array( $actualChangeTagDefs, false ) );
+
+               // Check if change_tag is also backpopulated
+               $actualChangeTags = wfGetDB( DB_REPLICA )->select(
+                       [ 'change_tag', 'change_tag_def' ],
+                       [ 'ct_tag', 'ct_tag_id', 'ctd_count' ],
+                       [],
+                       __METHOD__,
+                       [],
+                       [ 'change_tag_def' => [ 'LEFT JOIN', 'ct_tag_id=ctd_id' ] ]
+               );
+               $mapping = [
+                       'One Tag' => 1,
+                       'Two Tags' => 2,
+                       'Three Tags' => 3
+               ];
+               foreach ( $actualChangeTags as $row ) {
+                       $this->assertNotNull( $row->ct_tag_id );
+                       $this->assertEquals( $row->ctd_count, $mapping[$row->ct_tag] );
+               }
+       }
+
+       public function testRunUpdateHitCountMigrationNew() {
+               $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_NEW );
+               $changeTagDefBadRows = [
+                       [
+                               'ctd_name' => 'One Tag',
+                               'ctd_user_defined' => 0,
+                               'ctd_count' => 50,
+                       ],
+                       [
+                               'ctd_name' => 'Two Tags',
+                               'ctd_user_defined' => 0,
+                               'ctd_count' => 4,
+                       ],
+                       [
+                               'ctd_name' => 'Three Tags',
+                               'ctd_user_defined' => 0,
+                               'ctd_count' => 3,
+                       ],
+               ];
+               wfGetDB( DB_MASTER )->insert(
+                       'change_tag_def',
+                       $changeTagDefBadRows
+               );
+
+               $mapping = [
+                       'One Tag' => 1,
+                       'Two Tags' => 2,
+                       'Three Tags' => 3
+               ];
+               foreach ( $mapping as $tagName => $tagId ) {
+                       wfGetDB( DB_MASTER )->update(
+                               'change_tag',
+                               [ 'ct_tag_id' => $tagId ],
+                               [ 'ct_tag' => $tagName ]
+                       );
+               }
+
+               $this->maintenance->loadWithArgv( [ '--sleep', '0' ] );
+
+               $this->maintenance->execute();
+
+               $changeTagDefRows = [
+                       (object)[
+                               'ctd_name' => 'One Tag',
+                               'ctd_count' => 1,
+                       ],
+                       (object)[
+                               'ctd_name' => 'Two Tags',
+                               'ctd_count' => 2,
+                       ],
+                       (object)[
+                               'ctd_name' => 'Three Tags',
+                               'ctd_count' => 3,
+                       ],
+               ];
+
+               $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select(
+                       [ 'change_tag_def' ],
+                       [ 'ctd_name', 'ctd_count' ],
+                       [],
+                       __METHOD__,
+                       [ 'ORDER BY' => 'ctd_count' ]
+               );
+
+               $this->assertEquals( $changeTagDefRows, iterator_to_array( $actualChangeTagDefs, false ) );
+       }
+
+       public function testRunUpdateHitCountMigrationWriteBoth() {
+               $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_WRITE_BOTH );
+               $changeTagDefBadRows = [
+                       [
+                               'ctd_name' => 'One Tag',
+                               'ctd_user_defined' => 0,
+                               'ctd_count' => 50,
+                       ],
+                       [
+                               'ctd_name' => 'Two Tags',
+                               'ctd_user_defined' => 0,
+                               'ctd_count' => 4,
+                       ],
+                       [
+                               'ctd_name' => 'Three Tags',
+                               'ctd_user_defined' => 0,
+                               'ctd_count' => 3,
+                       ],
+               ];
+               wfGetDB( DB_MASTER )->insert(
+                       'change_tag_def',
+                       $changeTagDefBadRows
+               );
+
+               $this->maintenance->loadWithArgv( [ '--sleep', '0' ] );
+
+               $this->maintenance->execute();
+
+               $changeTagDefRows = [
+                       (object)[
+                               'ctd_name' => 'One Tag',
+                               'ctd_count' => 1,
+                       ],
+                       (object)[
+                               'ctd_name' => 'Two Tags',
+                               'ctd_count' => 2,
+                       ],
+                       (object)[
+                               'ctd_name' => 'Three Tags',
+                               'ctd_count' => 3,
+                       ],
+               ];
+
+               $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select(
+                       [ 'change_tag_def' ],
+                       [ 'ctd_name', 'ctd_count' ],
+                       [],
+                       __METHOD__,
+                       [ 'ORDER BY' => 'ctd_count' ]
+               );
+
+               $this->assertEquals( $changeTagDefRows, iterator_to_array( $actualChangeTagDefs, false ) );
+       }
+
+       public function testDryRunMigrationNew() {
+               $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_NEW );
+               $this->maintenance->loadWithArgv( [ '--dry-run', '--sleep', '0' ] );
+
+               $this->maintenance->execute();
+
+               $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select(
+                       [ 'change_tag_def' ],
+                       [ 'ctd_id', 'ctd_name' ]
+               );
+
+               $this->assertEquals( [], iterator_to_array( $actualChangeTagDefs, false ) );
+
+               $actualChangeTags = wfGetDB( DB_REPLICA )->select(
+                       [ 'change_tag' ],
+                       [ 'ct_tag_id', 'ct_tag' ]
+               );
+
+               foreach ( $actualChangeTags as $row ) {
+                       $this->assertNull( $row->ct_tag_id );
+                       $this->assertNotNull( $row->ct_tag );
+               }
+       }
+
+       public function testDryRunMigrationWriteBoth() {
+               $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_WRITE_BOTH );
+               $this->maintenance->loadWithArgv( [ '--dry-run', '--sleep', '0' ] );
+
+               $this->maintenance->execute();
+
+               $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select(
+                       [ 'change_tag_def' ],
+                       [ 'ctd_id', 'ctd_name' ]
+               );
+
+               $this->assertEquals( [], iterator_to_array( $actualChangeTagDefs, false ) );
+
+               $actualChangeTags = wfGetDB( DB_REPLICA )->select(
+                       [ 'change_tag' ],
+                       [ 'ct_tag_id', 'ct_tag' ]
+               );
+
+               foreach ( $actualChangeTags as $row ) {
+                       $this->assertNull( $row->ct_tag_id );
+                       $this->assertNotNull( $row->ct_tag );
+               }
+       }
+
+}