Migrate ar_text to modern storage
authorBrad Jorsch <bjorsch@wikimedia.org>
Tue, 28 Nov 2017 21:13:09 +0000 (16:13 -0500)
committerBrad Jorsch <bjorsch@wikimedia.org>
Tue, 9 Jan 2018 21:15:44 +0000 (16:15 -0500)
This really should have been done a very long time ago.

This adds a maintenance script to migrate rows with ar_text_id null to
modern storage, either the text table or ExternalStore.

Bug: T36925
Change-Id: I5608c6b6d3ecad516b785e13d668427c1b762e41

autoload.php
includes/Storage/SqlBlobStore.php
includes/installer/DatabaseUpdater.php
includes/installer/MssqlUpdater.php
includes/installer/MysqlUpdater.php
includes/installer/OracleUpdater.php
includes/installer/PostgresUpdater.php
includes/installer/SqliteUpdater.php
maintenance/migrateArchiveText.php [new file with mode: 0644]

index 4a50894..5d6104c 100644 (file)
@@ -1011,6 +1011,7 @@ $wgAutoloadLocalClasses = [
        'MessageContent' => __DIR__ . '/includes/content/MessageContent.php',
        'MessageLocalizer' => __DIR__ . '/languages/MessageLocalizer.php',
        'MessageSpecifier' => __DIR__ . '/includes/libs/MessageSpecifier.php',
+       'MigrateArchiveText' => __DIR__ . '/maintenance/migrateArchiveText.php',
        'MigrateComments' => __DIR__ . '/maintenance/migrateComments.php',
        'MigrateFileRepoLayout' => __DIR__ . '/maintenance/migrateFileRepoLayout.php',
        'MigrateUserGroup' => __DIR__ . '/maintenance/migrateUserGroup.php',
index 69e1539..5ddbd34 100644 (file)
@@ -466,6 +466,11 @@ class SqlBlobStore implements IDBAccessObject, BlobStore {
                        return false;
                }
 
+               if ( in_array( 'error', $blobFlags ) ) {
+                       // Error row, return false
+                       return false;
+               }
+
                if ( in_array( 'gzip', $blobFlags ) ) {
                        # Deal with optional compression of archived pages.
                        # This can be done periodically via maintenance/compressOld.php, and
index 242f148..176d0af 100644 (file)
@@ -1230,4 +1230,15 @@ abstract class DatabaseUpdater {
                }
        }
 
+       /**
+        * Migrate ar_text to modern storage
+        * @since 1.31
+        */
+       protected function migrateArchiveText() {
+               $this->output( "Migrating archive ar_text to modern storage.\n" );
+               $task = $this->maintenance->runChild( 'MigrateArchiveText', 'migrateArchiveText.php' );
+               $task->execute();
+               $this->output( "done.\n" );
+       }
+
 }
index cb7a6ba..b4b34de 100644 (file)
@@ -111,6 +111,7 @@ class MssqlUpdater extends DatabaseUpdater {
                        [ 'addTable', 'content', 'patch-content.sql' ],
                        [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ],
                        [ 'addTable', 'content_models', 'patch-content_models.sql' ],
+                       [ 'migrateArchiveText' ],
                ];
        }
 
index bc7725e..a3caa07 100644 (file)
@@ -335,6 +335,7 @@ class MysqlUpdater extends DatabaseUpdater {
                        [ 'addTable', 'content', 'patch-content.sql' ],
                        [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ],
                        [ 'addTable', 'content_models', 'patch-content_models.sql' ],
+                       [ 'migrateArchiveText' ],
                ];
        }
 
index 67150ee..ea68412 100644 (file)
@@ -132,6 +132,7 @@ class OracleUpdater extends DatabaseUpdater {
                        [ 'addTable', 'content', 'patch-content.sql' ],
                        [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ],
                        [ 'addTable', 'content_models', 'patch-content_models.sql' ],
+                       [ 'migrateArchiveText' ],
 
                        // KEEP THIS AT THE BOTTOM!!
                        [ 'doRebuildDuplicateFunction' ],
index fe8a1b1..367d431 100644 (file)
@@ -489,6 +489,7 @@ class PostgresUpdater extends DatabaseUpdater {
                        [ 'addTable', 'content', 'patch-content-table.sql' ],
                        [ 'addTable', 'content_models', 'patch-content_models-table.sql' ],
                        [ 'addTable', 'slot_roles', 'patch-slot_roles-table.sql' ],
+                       [ 'migrateArchiveText' ],
                ];
        }
 
index 88dfa6c..afb8b22 100644 (file)
@@ -198,7 +198,8 @@ class SqliteUpdater extends DatabaseUpdater {
                        [ 'addTable', 'content', 'patch-content.sql' ],
                        [ 'addTable', 'content_models', 'patch-content_models.sql' ],
                        [ 'addTable', 'slots', 'patch-slots.sql' ],
-                       [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ]
+                       [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ],
+                       [ 'migrateArchiveText' ],
                ];
        }
 
diff --git a/maintenance/migrateArchiveText.php b/maintenance/migrateArchiveText.php
new file mode 100644 (file)
index 0000000..dd78a7d
--- /dev/null
@@ -0,0 +1,159 @@
+<?php
+/**
+ * Migrate archive.ar_text and ar_flags to modern storage
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that migrates archive.ar_text and ar_flags to modern storage
+ *
+ * @ingroup Maintenance
+ * @since 1.31
+ */
+class MigrateArchiveText extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->addDescription(
+                       'Migrates comments from pre-1.5 ar_text and ar_flags columns to modern storage'
+               );
+               $this->addOption(
+                       'replace-missing',
+                       "For rows with missing or unloadable data, throw away whatever is there and\n"
+                       . "mark them as \"error\" in the database."
+               );
+       }
+
+       /**
+        * Sets whether a run of this maintenance script has the force parameter set
+        * @param bool $forced
+        */
+       public function setForce( $forced = true ) {
+               $this->mOptions['force'] = $forced;
+       }
+
+       protected function getUpdateKey() {
+               return __CLASS__;
+       }
+
+       protected function doDBUpdates() {
+               global $wgDefaultExternalStore;
+
+               $replaceMissing = $this->hasOption( 'replace-missing' );
+               $batchSize = $this->getBatchSize();
+
+               $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
+               $dbw = $this->getDB( DB_MASTER );
+               if ( !$dbr->fieldExists( 'archive', 'ar_text', __METHOD__ ) ||
+                       !$dbw->fieldExists( 'archive', 'ar_text', __METHOD__ )
+               ) {
+                       $this->output( "No ar_text field, so nothing to migrate.\n" );
+                       return true;
+               }
+
+               $this->output( "Migrating ar_text to modern storage...\n" );
+               $last = 0;
+               $count = 0;
+               $errors = 0;
+               while ( true ) {
+                       $res = $dbr->select(
+                               'archive',
+                               [ 'ar_id', 'ar_text', 'ar_flags' ],
+                               [
+                                       'ar_text_id' => null,
+                                       "ar_id > $last",
+                               ],
+                               __METHOD__,
+                               [ 'LIMIT' => $batchSize, 'ORDER BY' => [ 'ar_id' ] ]
+                       );
+                       $numRows = $res->numRows();
+
+                       foreach ( $res as $row ) {
+                               $last = $row->ar_id;
+
+                               // Recompress the text (and store in external storage, if
+                               // applicable) if it's not already in external storage.
+                               if ( !in_array( 'external', explode( ',', $row->ar_flags ), true ) ) {
+                                       $data = Revision::getRevisionText( $row, 'ar_' );
+                                       if ( $data !== false ) {
+                                               $flags = Revision::compressRevisionText( $data );
+
+                                               if ( $wgDefaultExternalStore ) {
+                                                       $data = ExternalStore::insertToDefault( $data );
+                                                       if ( !$data ) {
+                                                               throw new MWException( "Unable to store text to external storage" );
+                                                       }
+                                                       if ( $flags ) {
+                                                               $flags .= ',';
+                                                       }
+                                                       $flags .= 'external';
+                                               }
+                                       } elseif ( $replaceMissing ) {
+                                               $this->error( "Replacing missing data for row ar_id=$row->ar_id" );
+                                               $data = 'Missing data in migrateArchiveText.php on ' . date( 'c' );
+                                               $flags = 'error';
+                                       } else {
+                                               $this->error( "No data for row ar_id=$row->ar_id" );
+                                               $errors++;
+                                               continue;
+                                       }
+                               } else {
+                                       $flags = $row->ar_flags;
+                                       $data = $row->ar_text;
+                               }
+
+                               $this->beginTransaction( $dbw, __METHOD__ );
+                               $dbw->insert(
+                                       'text',
+                                       [ 'old_text' => $data, 'old_flags' => $flags ],
+                                       __METHOD__
+                               );
+                               $id = $dbw->insertId();
+                               $dbw->update(
+                                       'archive',
+                                       [ 'ar_text_id' => $id, 'ar_text' => '', 'ar_flags' => '' ],
+                                       [ 'ar_id' => $row->ar_id, 'ar_text_id' => null ],
+                                       __METHOD__
+                               );
+                               $count += $dbw->affectedRows();
+                               $this->commitTransaction( $dbw, __METHOD__ );
+                       }
+
+                       if ( $numRows < $batchSize ) {
+                               // We must have reached the end
+                               break;
+                       }
+
+                       $this->output( "... $last\n" );
+                       // $this->commitTransaction() already waited for slaves, no need to re-wait here.
+               }
+
+               $this->output( "Completed ar_text migration, $count rows updated, $errors missing data.\n" );
+               if ( $errors ) {
+                       $this->output( "Run with --replace-missing to overwrite missing data with an error message.\n" );
+               }
+
+               return $errors === 0;
+       }
+}
+
+$maintClass = "MigrateArchiveText";
+require_once RUN_MAINTENANCE_IF_MAIN;