Populate ar_rev_id and make it non-nullable
authorBrad Jorsch <bjorsch@wikimedia.org>
Thu, 4 Jan 2018 19:35:13 +0000 (14:35 -0500)
committerBrad Jorsch <bjorsch@wikimedia.org>
Mon, 2 Apr 2018 17:08:55 +0000 (13:08 -0400)
Revisions deleted before MediaWiki 1.5 do not have a value in this
field. This is going to be a problem for migration to the MCR schema, so
provide a maintenance script to clean this up.

Then, for good measure, change the schema to make the field
non-nullable.

Bug: T182678
Change-Id: Ie2e11f12a30f379db32c3e074658012c6f93adb0

17 files changed:
autoload.php
includes/installer/DatabaseUpdater.php
includes/installer/MssqlUpdater.php
includes/installer/MysqlUpdater.php
includes/installer/OracleUpdater.php
includes/installer/PostgresUpdater.php
includes/installer/SqliteUpdater.php
maintenance/archives/patch-ar_rev_id-not-null.sql [new file with mode: 0644]
maintenance/mssql/archives/patch-ar_rev_id-not-null.sql [new file with mode: 0644]
maintenance/mssql/tables.sql
maintenance/oracle/archives/patch-ar_rev_id-not-null.sql [new file with mode: 0644]
maintenance/oracle/tables.sql
maintenance/populateArchiveRevId.php [new file with mode: 0644]
maintenance/postgres/archives/patch-ar_rev_id-not-null.sql [new file with mode: 0644]
maintenance/postgres/tables.sql
maintenance/sqlite/archives/patch-ar_rev_id-not-null.sql [new file with mode: 0644]
maintenance/tables.sql

index 126362c..15c9ed2 100644 (file)
@@ -1159,6 +1159,7 @@ $wgAutoloadLocalClasses = [
        'PoolCounterWorkViaCallback' => __DIR__ . '/includes/poolcounter/PoolCounterWorkViaCallback.php',
        'PoolCounter_Stub' => __DIR__ . '/includes/poolcounter/PoolCounter.php',
        'PoolWorkArticleView' => __DIR__ . '/includes/poolcounter/PoolWorkArticleView.php',
+       'PopulateArchiveRevId' => __DIR__ . '/maintenance/populateArchiveRevId.php',
        'PopulateBacklinkNamespace' => __DIR__ . '/maintenance/populateBacklinkNamespace.php',
        'PopulateCategory' => __DIR__ . '/maintenance/populateCategory.php',
        'PopulateContentModel' => __DIR__ . '/maintenance/populateContentModel.php',
index 0d55454..7a1aba6 100644 (file)
@@ -1263,4 +1263,23 @@ abstract class DatabaseUpdater {
                $this->output( "done.\n" );
        }
 
+       /**
+        * Populate ar_rev_id, then make it not nullable
+        * @since 1.31
+        */
+        protected function populateArchiveRevId() {
+                $info = $this->db->fieldInfo( 'archive', 'ar_rev_id', __METHOD__ );
+                if ( !$info ) {
+                        throw new MWException( 'Missing ar_rev_id field of archive table. Should not happen.' );
+                }
+                if ( $info->isNullable() ) {
+                        $this->output( "Populating ar_rev_id.\n" );
+                        $task = $this->maintenance->runChild( 'PopulateArchiveRevId', 'populateArchiveRevId.php' );
+                        if ( $task->execute() ) {
+                                $this->applyPatch( 'patch-ar_rev_id-not-null.sql', false,
+                                        'Making ar_rev_id not nullable' );
+                        }
+                }
+        }
+
 }
index 694cd29..3a965d4 100644 (file)
@@ -121,6 +121,7 @@ class MssqlUpdater extends DatabaseUpdater {
                        [ 'migrateActors' ],
                        [ 'modifyField', 'revision', 'rev_text_id', 'patch-rev_text_id-default.sql' ],
                        [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ],
+                       [ 'populateArchiveRevId' ],
                ];
        }
 
index 9427596..caa9a67 100644 (file)
@@ -341,6 +341,7 @@ class MysqlUpdater extends DatabaseUpdater {
                        [ 'migrateActors' ],
                        [ 'modifyField', 'revision', 'rev_text_id', 'patch-rev_text_id-default.sql' ],
                        [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ],
+                       [ 'populateArchiveRevId' ],
                ];
        }
 
index cb0399c..6a1d220 100644 (file)
@@ -141,6 +141,7 @@ class OracleUpdater extends DatabaseUpdater {
                        [ 'addTable', 'actor', 'patch-actor-table.sql' ],
                        [ 'migrateActors' ],
                        [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ],
+                       [ 'populateArchiveRevId' ],
 
                        // KEEP THIS AT THE BOTTOM!!
                        [ 'doRebuildDuplicateFunction' ],
index c9a5086..45bfa22 100644 (file)
@@ -534,6 +534,7 @@ class PostgresUpdater extends DatabaseUpdater {
                        [ 'addPgIndex', 'logging', 'logging_actor_time', '( log_actor, log_timestamp )' ],
                        [ 'migrateActors' ],
                        [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ],
+                       [ 'populateArchiveRevId' ],
                ];
        }
 
index 7a2cc0d..09a52bb 100644 (file)
@@ -205,6 +205,7 @@ class SqliteUpdater extends DatabaseUpdater {
                        [ 'migrateActors' ],
                        [ 'modifyField', 'revision', 'rev_text_id', 'patch-rev_text_id-default.sql' ],
                        [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ],
+                       [ 'populateArchiveRevId' ],
                ];
        }
 
diff --git a/maintenance/archives/patch-ar_rev_id-not-null.sql b/maintenance/archives/patch-ar_rev_id-not-null.sql
new file mode 100644 (file)
index 0000000..8418f20
--- /dev/null
@@ -0,0 +1,3 @@
+-- T182678: Make ar_rev_id not nullable
+ALTER TABLE /*_*/archive
+  CHANGE COLUMN ar_rev_id ar_rev_id int unsigned NOT NULL;
diff --git a/maintenance/mssql/archives/patch-ar_rev_id-not-null.sql b/maintenance/mssql/archives/patch-ar_rev_id-not-null.sql
new file mode 100644 (file)
index 0000000..d287f49
--- /dev/null
@@ -0,0 +1 @@
+ALTER TABLE /*_*/archive ALTER COLUMN ar_rev_id INT NOT NULL;
index 977666d..50a5b9f 100644 (file)
@@ -279,7 +279,7 @@ CREATE TABLE /*_*/archive (
    ar_timestamp varchar(14) NOT NULL default '',
    ar_minor_edit BIT NOT NULL DEFAULT 0,
    ar_flags NVARCHAR(255) NOT NULL,
-   ar_rev_id INT NULL, -- NOT a FK, the row gets deleted from revision and moved here
+   ar_rev_id INT NOT NULL, -- NOT a FK, the row gets deleted from revision and moved here
    ar_text_id INT CONSTRAINT ar_text_id__old_id__fk FOREIGN KEY REFERENCES /*_*/text(old_id) ON DELETE CASCADE,
    ar_deleted TINYINT NOT NULL DEFAULT 0,
    ar_len INT,
diff --git a/maintenance/oracle/archives/patch-ar_rev_id-not-null.sql b/maintenance/oracle/archives/patch-ar_rev_id-not-null.sql
new file mode 100644 (file)
index 0000000..56f1598
--- /dev/null
@@ -0,0 +1,5 @@
+-- T182678: Make ar_rev_id not nullable
+
+define mw_prefix='{$wgDBprefix}';
+
+ALTER TABLE &mw_prefix.archive MODIFY ar_rev_id NUMBER NOT NULL;
index 8551fb9..70a4743 100644 (file)
@@ -257,7 +257,7 @@ CREATE TABLE &mw_prefix.archive (
   ar_timestamp   TIMESTAMP(6) WITH TIME ZONE  NOT NULL,
   ar_minor_edit  CHAR(1)         DEFAULT '0' NOT NULL,
   ar_flags       VARCHAR2(255),
-  ar_rev_id      NUMBER,
+  ar_rev_id      NUMBER NOT NULL,
   ar_text_id     NUMBER,
   ar_deleted     CHAR(1)      DEFAULT '0' NOT NULL,
   ar_len         NUMBER,
diff --git a/maintenance/populateArchiveRevId.php b/maintenance/populateArchiveRevId.php
new file mode 100644 (file)
index 0000000..b8b9e68
--- /dev/null
@@ -0,0 +1,177 @@
+<?php
+/**
+ * Populate ar_rev_id in pre-1.5 rows
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+use Wikimedia\Rdbms\IDatabase;
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that populares archive.ar_rev_id in old rows
+ *
+ * @ingroup Maintenance
+ * @since 1.31
+ */
+class PopulateArchiveRevId extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->addDescription( 'Populate ar_rev_id in pre-1.5 rows' );
+               $this->setBatchSize( 100 );
+       }
+
+       protected function getUpdateKey() {
+               return __CLASS__;
+       }
+
+       protected function doDBUpdates() {
+               $this->output( "Populating ar_rev_id...\n" );
+               $dbw = $this->getDB( DB_MASTER );
+
+               // Quick exit if there are no rows needing updates.
+               $any = $dbw->selectField(
+                       'archive',
+                       'ar_id',
+                       [ 'ar_rev_id' => null ],
+                       __METHOD__
+               );
+               if ( !$any ) {
+                       $this->output( "Completed ar_rev_id population, 0 rows updated.\n" );
+                       return true;
+               }
+
+               $rev = $this->makeDummyRevisionRow( $dbw );
+               $count = 0;
+               while ( true ) {
+                       wfWaitForSlaves();
+
+                       $arIds = $dbw->selectFieldValues(
+                               'archive',
+                               'ar_id',
+                               [ 'ar_rev_id' => null ],
+                               __METHOD__,
+                               [ 'LIMIT' => $this->getBatchSize(), 'ORDER BY' => [ 'ar_id' ] ]
+                       );
+                       if ( !$arIds ) {
+                               $this->output( "Completed ar_rev_id population, $count rows updated.\n" );
+                               return true;
+                       }
+
+                       try {
+                               $updates = $dbw->doAtomicSection( __METHOD__, function ( $dbw, $fname ) use ( $arIds, $rev ) {
+                                       // Create new rev_ids by inserting dummy rows into revision and then deleting them.
+                                       $dbw->insert( 'revision', array_fill( 0, count( $arIds ), $rev ), $fname );
+                                       $revIds = $dbw->selectFieldValues(
+                                               'revision',
+                                               'rev_id',
+                                               [ 'rev_timestamp' => $rev['rev_timestamp'] ],
+                                               $fname
+                                       );
+                                       if ( !is_array( $revIds ) ) {
+                                               throw new UnexpectedValueException( 'Failed to insert dummy revisions' );
+                                       }
+                                       if ( count( $revIds ) !== count( $arIds ) ) {
+                                               throw new UnexpectedValueException(
+                                                       'Tried to insert ' . count( $arIds ) . ' dummy revisions, but found '
+                                                       . count( $revIds ) . ' matching rows.'
+                                               );
+                                       }
+                                       $dbw->delete( 'revision', [ 'rev_id' => $revIds ], $fname );
+
+                                       return array_combine( $arIds, $revIds );
+                               } );
+                       } catch ( UnexpectedValueException $ex ) {
+                               $this->fatalError( $ex->getMessage() );
+                       }
+
+                       foreach ( $updates as $arId => $revId ) {
+                               $dbw->update(
+                                       'archive',
+                                       [ 'ar_rev_id' => $revId ],
+                                       [ 'ar_id' => $arId, 'ar_rev_id' => null ],
+                                       __METHOD__
+                               );
+                               $count += $dbw->affectedRows();
+                       }
+
+                       $min = min( array_keys( $updates ) );
+                       $max = max( array_keys( $updates ) );
+                       $this->output( " ... $min-$max\n" );
+               }
+       }
+
+       /**
+        * Construct a dummy revision table row to use for reserving IDs
+        *
+        * The row will have a wildly unlikely timestamp, and possibly a generic
+        * user and comment, but will otherwise be derived from a revision on the
+        * wiki's main page.
+        *
+        * @param IDatabase $dbw
+        * @return array
+        */
+       private function makeDummyRevisionRow( IDatabase $dbw ) {
+               $ts = $dbw->timestamp( '11111111111111' );
+               $mainPage = Title::newMainPage();
+               if ( !$mainPage ) {
+                       $this->fatalError( 'Main page does not exist' );
+               }
+               $pageId = $mainPage->getArticleId();
+               if ( !$pageId ) {
+                       $this->fatalError( $mainPage->getPrefixedText() . ' has no ID' );
+               }
+               $rev = $dbw->selectRow(
+                       'revision',
+                       '*',
+                       [ 'rev_page' => $pageId ],
+                       __METHOD__,
+                       [ 'ORDER BY' => 'rev_timestamp ASC' ]
+               );
+               if ( !$rev ) {
+                       $this->fatalError( $mainPage->getPrefixedText() . ' has no revisions' );
+               }
+               unset( $rev->rev_id );
+               $rev = (array)$rev;
+               $rev['rev_timestamp'] = $ts;
+               if ( isset( $rev['rev_user'] ) ) {
+                       $rev['rev_user'] = 0;
+                       $rev['rev_user_text'] = '0.0.0.0';
+               }
+               if ( isset( $rev['rev_comment'] ) ) {
+                       $rev['rev_comment'] = 'Dummy row';
+               }
+
+               $any = $dbw->selectField(
+                       'revision',
+                       'rev_id',
+                       [ 'rev_timestamp' => $ts ],
+                       __METHOD__
+               );
+               if ( $any ) {
+                       $this->fatalError( "... Why does your database contain a revision dated $ts?" );
+               }
+
+               return $rev;
+       }
+}
+
+$maintClass = "PopulateArchiveRevId";
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/postgres/archives/patch-ar_rev_id-not-null.sql b/maintenance/postgres/archives/patch-ar_rev_id-not-null.sql
new file mode 100644 (file)
index 0000000..1a090e9
--- /dev/null
@@ -0,0 +1,3 @@
+-- T182678: Make ar_rev_id not nullable
+ALTER TABLE archive
+  ALTER COLUMN ar_rev_id SET NOT NULL;
index 9a5420a..bf89aed 100644 (file)
@@ -248,7 +248,7 @@ CREATE TABLE archive (
   ar_timestamp      TIMESTAMPTZ  NOT NULL,
   ar_minor_edit     SMALLINT     NOT NULL  DEFAULT 0,
   ar_flags          TEXT,
-  ar_rev_id         INTEGER,
+  ar_rev_id         INTEGER      NOT NULL,
   ar_text_id        INTEGER,
   ar_deleted        SMALLINT     NOT NULL  DEFAULT 0,
   ar_len            INTEGER          NULL,
diff --git a/maintenance/sqlite/archives/patch-ar_rev_id-not-null.sql b/maintenance/sqlite/archives/patch-ar_rev_id-not-null.sql
new file mode 100644 (file)
index 0000000..3df205f
--- /dev/null
@@ -0,0 +1,49 @@
+-- T182678: Make ar_rev_id not nullable
+
+BEGIN;
+
+DROP TABLE IF EXISTS /*_*/archive_tmp;
+CREATE TABLE /*_*/archive_tmp (
+  ar_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
+  ar_namespace int NOT NULL default 0,
+  ar_title varchar(255) binary NOT NULL default '',
+  ar_text mediumblob NOT NULL,
+  ar_comment varbinary(767) NOT NULL default '',
+  ar_comment_id bigint unsigned NOT NULL DEFAULT 0,
+  ar_user int unsigned NOT NULL default 0,
+  ar_user_text varchar(255) binary NOT NULL DEFAULT '',
+  ar_actor bigint unsigned NOT NULL DEFAULT 0,
+  ar_timestamp binary(14) NOT NULL default '',
+  ar_minor_edit tinyint NOT NULL default 0,
+  ar_flags tinyblob NOT NULL,
+  ar_rev_id int unsigned NOT NULL,
+  ar_text_id int unsigned,
+  ar_deleted tinyint unsigned NOT NULL default 0,
+  ar_len int unsigned,
+  ar_page_id int unsigned,
+  ar_parent_id int unsigned default NULL,
+  ar_sha1 varbinary(32) NOT NULL default '',
+  ar_content_model varbinary(32) DEFAULT NULL,
+  ar_content_format varbinary(64) DEFAULT NULL
+) /*$wgDBTableOptions*/;
+
+INSERT OR IGNORE INTO /*_*/archive_tmp (
+       ar_id, ar_namespace, ar_title, ar_text, ar_comment, ar_comment_id, ar_user,
+       ar_user_text, ar_actor, ar_timestamp, ar_minor_edit, ar_flags, ar_rev_id,
+       ar_text_id, ar_deleted, ar_len, ar_page_id, ar_parent_id, ar_sha1,
+       ar_content_model, ar_content_format)
+  SELECT
+       ar_id, ar_namespace, ar_title, ar_text, ar_comment, ar_comment_id, ar_user,
+       ar_user_text, ar_actor, ar_timestamp, ar_minor_edit, ar_flags, ar_rev_id,
+       ar_text_id, ar_deleted, ar_len, ar_page_id, ar_parent_id, ar_sha1,
+       ar_content_model, ar_content_format
+  FROM /*_*/archive;
+
+DROP TABLE /*_*/archive;
+ALTER TABLE /*_*/archive_tmp RENAME TO /*_*/archive;
+CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp);
+CREATE INDEX /*i*/ar_usertext_timestamp ON /*_*/archive (ar_user_text,ar_timestamp);
+CREATE INDEX /*i*/ar_actor_timestamp ON /*_*/archive (ar_actor,ar_timestamp);
+CREATE INDEX /*i*/ar_revid ON /*_*/archive (ar_rev_id);
+
+COMMIT;
index 0c73f29..bd1b628 100644 (file)
@@ -618,7 +618,7 @@ CREATE TABLE /*_*/archive (
   --
   -- @since 1.5 Entries from 1.4 will be NULL here. When restoring
   -- archive rows from before 1.5, a new rev_id is created.
-  ar_rev_id int unsigned,
+  ar_rev_id int unsigned NOT NULL,
 
   -- Copied from rev_text_id, references text.old_id.
   -- To avoid breaking the block-compression scheme and otherwise making