Reinstated r94289 et all - rev_sha1/ar_sha1 field for bug 21860
authorAaron Schulz <aaron@users.mediawiki.org>
Thu, 27 Oct 2011 18:44:10 +0000 (18:44 +0000)
committerAaron Schulz <aaron@users.mediawiki.org>
Thu, 27 Oct 2011 18:44:10 +0000 (18:44 +0000)
includes/AutoLoader.php
includes/Revision.php
includes/WikiPage.php
includes/installer/DatabaseUpdater.php
includes/installer/MysqlUpdater.php
includes/installer/SqliteUpdater.php
includes/specials/SpecialUndelete.php
maintenance/archives/patch-ar_sha1.sql [new file with mode: 0644]
maintenance/archives/patch-rev_sha1.sql [new file with mode: 0644]
maintenance/populateRevisionSha1.php [new file with mode: 0644]
maintenance/tables.sql

index d5b20fc..2f07212 100644 (file)
@@ -865,6 +865,7 @@ $wgAutoloadLocalClasses = array(
        'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php',
        'PopulateParentId' => 'maintenance/populateParentId.php',
        'PopulateRevisionLength' => 'maintenance/populateRevisionLength.php',
+       'PopulateRevisionSha1' => 'maintenance/populateRevisionSha1.php',
        'SevenZipStream' => 'maintenance/7zip.inc',
        'Sqlite' => 'maintenance/sqlite.inc',
        'UpdateCollation' => 'maintenance/updateCollation.php',
index ec0508c..f75442c 100644 (file)
@@ -13,6 +13,7 @@ class Revision {
        protected $mTimestamp;
        protected $mDeleted;
        protected $mSize;
+       protected $mSha1;
        protected $mParentId;
        protected $mComment;
        protected $mText;
@@ -122,7 +123,8 @@ class Revision {
                        'minor_edit' => $row->ar_minor_edit,
                        'text_id'    => isset( $row->ar_text_id ) ? $row->ar_text_id : null,
                        'deleted'    => $row->ar_deleted,
-                       'len'        => $row->ar_len
+                       'len'        => $row->ar_len,
+                       'sha1'       => $row->ar_sha1
                );
                if ( isset( $row->ar_text ) && !$row->ar_text_id ) {
                        // Pre-1.5 ar_text row
@@ -313,7 +315,8 @@ class Revision {
                        'rev_minor_edit',
                        'rev_deleted',
                        'rev_len',
-                       'rev_parent_id'
+                       'rev_parent_id',
+                       'rev_sha1'
                );
        }
 
@@ -375,6 +378,12 @@ class Revision {
                                $this->mSize = intval( $row->rev_len );
                        }
 
+                       if ( !isset( $row->rev_sha1 ) ) {
+                               $this->mSha1 = null;
+                       } else {
+                               $this->mSha1 = $row->rev_sha1;
+                       }
+
                        if( isset( $row->page_latest ) ) {
                                $this->mCurrent = ( $row->rev_id == $row->page_latest );
                                $this->mTitle = Title::newFromRow( $row );
@@ -402,7 +411,7 @@ class Revision {
                        $this->mOrigUserText = $row->rev_user_text;
                } elseif( is_array( $row ) ) {
                        // Build a new revision to be saved...
-                       global $wgUser;
+                       global $wgUser; // ugh
 
                        $this->mId        = isset( $row['id']         ) ? intval( $row['id']         ) : null;
                        $this->mPage      = isset( $row['page']       ) ? intval( $row['page']       ) : null;
@@ -414,6 +423,7 @@ class Revision {
                        $this->mDeleted   = isset( $row['deleted']    ) ? intval( $row['deleted']    ) : 0;
                        $this->mSize      = isset( $row['len']        ) ? intval( $row['len']        ) : null;
                        $this->mParentId  = isset( $row['parent_id']  ) ? intval( $row['parent_id']  ) : null;
+                       $this->mSha1      = isset( $row['sha1']  )      ? strval( $row['sha1']  )      : null;
 
                        // Enforce spacing trimming on supplied text
                        $this->mComment   = isset( $row['comment']    ) ?  trim( strval( $row['comment'] ) ) : null;
@@ -422,9 +432,13 @@ class Revision {
 
                        $this->mTitle     = null; # Load on demand if needed
                        $this->mCurrent   = false;
-                       # If we still have no len_size, see it we have the text to figure it out
+                       # If we still have no length, see it we have the text to figure it out
                        if ( !$this->mSize ) {
-                               $this->mSize      = is_null( $this->mText ) ? null : strlen( $this->mText );
+                               $this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText );
+                       }
+                       # Same for sha1
+                       if ( $this->mSha1 === null ) {
+                               $this->mSha1 = is_null( $this->mText ) ? null : self::base36Sha1( $this->mText );
                        }
                } else {
                        throw new MWException( 'Revision constructor passed invalid row format.' );
@@ -468,6 +482,15 @@ class Revision {
                return $this->mSize;
        }
 
+       /**
+        * Returns the base36 sha1 of the text in this revision, or null if unknown.
+        *
+        * @return String
+        */
+       public function getSha1() {
+               return $this->mSha1;
+       }
+
        /**
         * Returns the title of the page associated with this entry.
         *
@@ -938,8 +961,12 @@ class Revision {
                                'rev_timestamp'  => $dbw->timestamp( $this->mTimestamp ),
                                'rev_deleted'    => $this->mDeleted,
                                'rev_len'        => $this->mSize,
-                               'rev_parent_id'  => is_null($this->mParentId) ?
-                                       $this->getPreviousRevisionId( $dbw ) : $this->mParentId
+                               'rev_parent_id'  => is_null( $this->mParentId )
+                                       ? $this->getPreviousRevisionId( $dbw )
+                                       : $this->mParentId,
+                               'rev_sha1'       => is_null( $this->mSha1 )
+                                       ? Revision::base36Sha1( $this->mText )
+                                       : $this->mSha1
                        ), __METHOD__
                );
 
@@ -951,6 +978,15 @@ class Revision {
                return $this->mId;
        }
 
+       /**
+        * Get the base 36 SHA-1 value for a string of text
+        * @param $text String
+        * @return String
+        */
+       public static function base36Sha1( $text ) {
+               return wfBaseConvert( sha1( $text ), 16, 36, 31 );
+       }
+
        /**
         * Lazy-load the revision's text.
         * Currently hardcoded to the 'text' table storage engine.
index 3462427..4bc7873 100644 (file)
@@ -1667,7 +1667,8 @@ class WikiPage extends Page {
                                'ar_flags'      => '\'\'', // MySQL's "strict mode"...
                                'ar_len'        => 'rev_len',
                                'ar_page_id'    => 'page_id',
-                               'ar_deleted'    => $bitfield
+                               'ar_deleted'    => $bitfield,
+                               'ar_sha1'       => 'rev_sha1'
                        ), array(
                                'page_id' => $id,
                                'page_id = rev_page'
index e7ecb6f..80dbe30 100644 (file)
@@ -41,7 +41,9 @@ abstract class DatabaseUpdater {
 
        protected $postDatabaseUpdateMaintenance = array(
                'DeleteDefaultMessages',
-               'PopulateRevisionLength'
+               'PopulateRevisionLength',
+               'PopulateRevisionSha1',
+               'PopulateImageSha1'
        );
 
        /**
index cd6de36..30715fd 100644 (file)
@@ -187,7 +187,8 @@ class MysqlUpdater extends DatabaseUpdater {
                        array( 'addIndex', 'logging',       'type_action',      'patch-logging-type-action-index.sql'),
                        array( 'doMigrateUserOptions' ),
                        array( 'dropField', 'user',         'user_options', 'patch-drop-user_options.sql' ),
-
+                       array( 'addField', 'revision',      'rev_sha1',         'patch-rev_sha1.sql' ),
+                       array( 'addField', 'archive',       'ar_sha1',          'patch-ar_sha1.sql' )
                );
        }
 
index 04b6a31..19e12c5 100644 (file)
@@ -65,6 +65,8 @@ class SqliteUpdater extends DatabaseUpdater {
                        array( 'addIndex', 'logging',       'type_action',      'patch-logging-type-action-index.sql'),
                        array( 'doMigrateUserOptions' ),
                        array( 'dropField', 'user',         'user_options', 'patch-drop-user_options.sql' ),
+                       array( 'addField', 'revision',      'rev_sha1',         'patch-rev_sha1.sql' ),
+                       array( 'addField', 'archive',       'ar_sha1',          'patch-ar_sha1.sql' )
                );
        }
 
index 5ee965e..7342f3f 100644 (file)
@@ -116,7 +116,7 @@ class PageArchive {
                $res = $dbr->select( 'archive',
                        array(
                                'ar_minor_edit', 'ar_timestamp', 'ar_user', 'ar_user_text',
-                               'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id'
+                               'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id', 'ar_sha1'
                        ),
                        array( 'ar_namespace' => $this->title->getNamespace(),
                                   'ar_title' => $this->title->getDBkey() ),
@@ -460,7 +460,8 @@ class PageArchive {
                                'ar_text_id',
                                'ar_deleted',
                                'ar_page_id',
-                               'ar_len' ),
+                               'ar_len',
+                               'ar_sha1' ),
                        /* WHERE */ array(
                                'ar_namespace' => $this->title->getNamespace(),
                                'ar_title'     => $this->title->getDBkey(),
diff --git a/maintenance/archives/patch-ar_sha1.sql b/maintenance/archives/patch-ar_sha1.sql
new file mode 100644 (file)
index 0000000..1c7d8e9
--- /dev/null
@@ -0,0 +1,3 @@
+-- Adding ar_sha1 field
+ALTER TABLE /*$wgDBprefix*/archive
+  ADD ar_sha1 varbinary(32) NOT NULL default '';
diff --git a/maintenance/archives/patch-rev_sha1.sql b/maintenance/archives/patch-rev_sha1.sql
new file mode 100644 (file)
index 0000000..0100c36
--- /dev/null
@@ -0,0 +1,3 @@
+-- Adding rev_sha1 field
+ALTER TABLE /*$wgDBprefix*/revision
+  ADD rev_sha1 varbinary(32) NOT NULL default '';
diff --git a/maintenance/populateRevisionSha1.php b/maintenance/populateRevisionSha1.php
new file mode 100644 (file)
index 0000000..386a0a6
--- /dev/null
@@ -0,0 +1,108 @@
+<?php
+/**
+ * Fills the rev_sha1 and ar_sha1 columns of revision
+ * and archive tables for revisions created before MW 1.19.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields";
+               $this->setBatchSize( 200 );
+       }
+
+       protected function getUpdateKey() {
+               return 'populate rev_sha1';
+       }
+
+       protected function doDBUpdates() {
+               $db = $this->getDB( DB_MASTER );
+               if ( !$db->tableExists( 'revision' ) ) {
+                       $this->error( "revision table does not exist", true );
+               }
+               if ( !$db->tableExists( 'archive' ) ) {
+                       $this->error( "archive table does not exist", true );
+               }
+
+               $this->output( "Populating rev_sha1 column\n" );
+               $rc = $this->doSha1Updates( $db, 'revision', 'rev_id', 'rev' );
+
+               $this->output( "Populating ar_sha1 column\n" );
+               $ac = $this->doSha1Updates( $db, 'archive', 'ar_rev_id', 'ar' );
+
+               $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" );
+               return true;
+       }
+
+       /**
+        * @return Integer Rows changed
+        */
+       protected function doSha1Updates( $db, $table, $idCol, $prefix ) {
+               $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
+               $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
+               if ( !$start || !$end ) {
+                       $this->output( "...$table table seems to be empty.\n" );
+                       return true;
+               }
+
+               $count = 0;
+               # Do remaining chunk
+               $end += $this->mBatchSize - 1;
+               $blockStart = $start;
+               $blockEnd = $start + $this->mBatchSize - 1;
+               while ( $blockEnd <= $end ) {
+                       $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
+                       $cond = "$idCol BETWEEN $blockStart AND $blockEnd
+                               AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
+                       $res = $db->select( $table, '*', $cond, __METHOD__ );
+
+                       $db->begin();
+                       foreach ( $res as $row ) {
+                               if ( $table === 'archive' ) {
+                                       $rev = Revision::newFromArchiveRow( $row );
+                               } else {
+                                       $rev = new Revision( $row );
+                               }
+                               $text = $rev->getRawText();
+                               if ( !is_string( $text ) ) {
+                                       # This should not happen, but sometimes does (bug 20757)
+                                       $this->output( "Text of revision {$row->$idCol} unavailable!\n" );
+                               } else {
+                                       $db->update( $table,
+                                               array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ),
+                                               array( $idCol => $row->$idCol ),
+                                               __METHOD__ );
+                                       $count++;
+                               }
+                       }
+                       $db->commit();
+
+                       $blockStart += $this->mBatchSize;
+                       $blockEnd += $this->mBatchSize;
+                       wfWaitForSlaves();
+               }
+               return $count;
+       }
+}
+
+$maintClass = "PopulateRevisionSha1";
+require_once( RUN_MAINTENANCE_IF_MAIN );
index 4b469b4..f42b9a6 100644 (file)
@@ -311,7 +311,10 @@ CREATE TABLE /*_*/revision (
 
   -- Key to revision.rev_id
   -- This field is used to add support for a tree structure (The Adjacency List Model)
-  rev_parent_id int unsigned default NULL
+  rev_parent_id int unsigned default NULL,
+
+  -- SHA-1 text content hash in base-36
+  rev_sha1 varbinary(32) NOT NULL default ''
 
 ) /*$wgDBTableOptions*/ MAX_ROWS=10000000 AVG_ROW_LENGTH=1024;
 -- In case tables are created as MyISAM, use row hints for MySQL <5.0 to avoid 4GB limit
@@ -418,7 +421,10 @@ CREATE TABLE /*_*/archive (
   ar_page_id int unsigned,
 
   -- Original previous revision
-  ar_parent_id int unsigned default NULL
+  ar_parent_id int unsigned default NULL,
+
+  -- SHA-1 text content hash in base-36
+  ar_sha1 varbinary(32) NOT NULL default ''
 ) /*$wgDBTableOptions*/;
 
 CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp);