Make populateRevisionLength fix rows with ar_len = 0.
authordaniel <daniel.kinzler@wikimedia.de>
Mon, 23 Apr 2018 09:07:24 +0000 (11:07 +0200)
committerLegoktm <legoktm@member.fsf.org>
Tue, 1 May 2018 16:12:29 +0000 (16:12 +0000)
Due to T192189, some rows in the archive table may have ar_len set to 0
erronously. This can be detected by checking if they have ar_sha1 set to
"phoiac9h4m842xq45sp7s6u21eteeq1", which is the hash of the empty string.

This patch makes populateRevisionLength.php detect and fix such rows.

Bug: T192189
Change-Id: I9b6cd62f4b8c5bf93cd305b56af4352a527e19c9
(cherry picked from commit e420790f5200568422dce9473ef44d4f7cefe675)

maintenance/populateRevisionLength.php

index 8895c9f..ffa4ff7 100644 (file)
@@ -21,6 +21,8 @@
  * @ingroup Maintenance
  */
 
+use Wikimedia\Rdbms\IDatabase;
+
 require_once __DIR__ . '/Maintenance.php';
 
 /**
@@ -97,7 +99,13 @@ class PopulateRevisionLength extends LoggedUpdateMaintenance {
                                [
                                        "$idCol >= $blockStart",
                                        "$idCol <= $blockEnd",
-                                       "{$prefix}_len IS NULL"
+                                       $dbr->makeList( [
+                                               "{$prefix}_len IS NULL",
+                                               $dbr->makeList( [
+                                                       "{$prefix}_len = 0",
+                                                       "{$prefix}_sha1 != \"phoiac9h4m842xq45sp7s6u21eteeq1\"", // sha1( "" )
+                                               ], IDatabase::LIST_AND )
+                                       ], IDatabase::LIST_OR )
                                ],
                                __METHOD__,
                                [],
@@ -136,7 +144,7 @@ class PopulateRevisionLength extends LoggedUpdateMaintenance {
                        ? Revision::newFromArchiveRow( $row )
                        : new Revision( $row );
 
-               $content = $rev->getContent();
+               $content = $rev->getContent( Revision::RAW );
                if ( !$content ) {
                        # This should not happen, but sometimes does (T22757)
                        $id = $row->$idCol;