Add script to fix content model of JSON pages
authorKunal Mehta <legoktm@gmail.com>
Wed, 21 Oct 2015 05:44:33 +0000 (22:44 -0700)
committerKunal Mehta <legoktm@gmail.com>
Tue, 24 Nov 2015 18:08:44 +0000 (10:08 -0800)
MediaWiki:Foo.json and User:Foo/bar.json pages now have a default
content model of JSON, but existing pages using those names will be set
to defaults of wikitext.

The content models of those pages are now set to "json", unless it has
invalid syntax, in which case it will be set to "wikitext".

For convenience, the script is automatically run as part of update.php.

Bug: T108663
Change-Id: I1412937ccea8e65dba58580beec79cbf2286ae01

autoload.php
includes/installer/DatabaseUpdater.php
maintenance/fixDefaultJsonContentPages.php [new file with mode: 0644]

index d7bccb3..80d1eeb 100644 (file)
@@ -447,6 +447,7 @@ $wgAutoloadLocalClasses = array(
        'FindHooks' => __DIR__ . '/maintenance/findHooks.php',
        'FindMissingFiles' => __DIR__ . '/maintenance/findMissingFiles.php',
        'FixBug20757' => __DIR__ . '/maintenance/storage/fixBug20757.php',
+       'FixDefaultJsonContentPages' => __DIR__ . '/maintenance/fixDefaultJsonContentPages.php',
        'FixDoubleRedirects' => __DIR__ . '/maintenance/fixDoubleRedirects.php',
        'FixExtLinksProtocolRelative' => __DIR__ . '/maintenance/fixExtLinksProtocolRelative.php',
        'FixTimestamps' => __DIR__ . '/maintenance/fixTimestamps.php',
index 57084cb..904fde8 100644 (file)
@@ -73,7 +73,8 @@ abstract class DatabaseUpdater {
                'PopulateImageSha1',
                'FixExtLinksProtocolRelative',
                'PopulateFilearchiveSha1',
-               'PopulateBacklinkNamespace'
+               'PopulateBacklinkNamespace',
+               'FixDefaultJsonContentPages'
        );
 
        /**
diff --git a/maintenance/fixDefaultJsonContentPages.php b/maintenance/fixDefaultJsonContentPages.php
new file mode 100644 (file)
index 0000000..1265891
--- /dev/null
@@ -0,0 +1,128 @@
+<?php
+/**
+ * Fix instances of pre-existing JSON pages
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Usage:
+ *  fixDefaultJsonContentPages.php
+ *
+ * It is automatically run by update.php
+ */
+class FixDefaultJsonContentPages extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription =
+                               'Fix instances of JSON pages prior to them being the ContentHandler default';
+               $this->setBatchSize( 100 );
+       }
+
+       protected function getUpdateKey() {
+               return __CLASS__;
+       }
+
+       protected function doDBUpdates() {
+               if ( !$this->getConfig()->get( 'ContentHandlerUseDB' ) ) {
+                       $this->output( "\$wgContentHandlerUseDB is not enabled, nothing to do.\n" );
+                       return true;
+               }
+
+               $dbr = wfGetDB( DB_SLAVE );
+               $namespaces = array(
+                       NS_MEDIAWIKI => $dbr->buildLike( $dbr->anyString(), '.json' ),
+                       NS_USER => $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString(), '.json' ),
+               );
+               foreach ( $namespaces as $ns => $like ) {
+                       $lastPage = 0;
+                       do {
+                               $rows = $dbr->select(
+                                               'page',
+                                               array( 'page_id', 'page_title', 'page_namespace', 'page_content_model' ),
+                                               array(
+                                                               'page_namespace' => $ns,
+                                                               'page_title ' . $like,
+                                                               'page_id > ' . $dbr->addQuotes( $lastPage )
+                                               ),
+                                               __METHOD__,
+                                               array( 'ORDER BY' => 'page_id', 'LIMIT' => $this->mBatchSize )
+                               );
+                               foreach ( $rows as $row ) {
+                                       $this->handleRow( $row );
+                               }
+                       } while ( $rows->numRows() >= $this->mBatchSize );
+               }
+
+               return true;
+       }
+
+       protected function handleRow( stdClass $row ) {
+               $title = Title::makeTitle( $row->page_namespace, $row->page_title );
+               $this->output( "Processing {$title} ({$row->page_id})...\n" );
+               $rev = Revision::newFromTitle( $title );
+               $content = $rev->getContent( Revision::RAW );
+               $dbw = wfGetDB( DB_MASTER );
+               if ( $content instanceof JsonContent ) {
+                       if ( $content->isValid() ) {
+                               // Yay, actually JSON. We need to just change the
+                               // page_content_model because revision will automatically
+                               // use the default, which is *now* JSON.
+                               $this->output( "Setting page_content_model to json..." );
+                               $dbw->update(
+                                       'page',
+                                       array( 'page_content_model' => CONTENT_MODEL_JSON ),
+                                       array( 'page_id' => $row->page_id ),
+                                       __METHOD__
+                               );
+                               $this->output( "done.\n" );
+                               wfWaitForSlaves();
+                       } else {
+                               // Not JSON...force it to wikitext. We need to update the
+                               // revision table so that these revisions are always processed
+                               // as wikitext in the future. page_content_model is already
+                               // set to "wikitext".
+                               $this->output( "Setting rev_content_model to wikitext..." );
+                               // Grab all the ids for batching
+                               $ids = $dbw->selectFieldValues(
+                                       'revision',
+                                       'rev_id',
+                                       array( 'rev_page' => $row->page_id ),
+                                       __METHOD__
+                               );
+                               foreach ( array_chunk( $ids, 50 ) as $chunk ) {
+                                       $dbw->update(
+                                               'revision',
+                                               array( 'rev_content_model' => CONTENT_MODEL_WIKITEXT ),
+                                               array( 'rev_page' => $row->page_id, 'rev_id' => $chunk )
+                                       );
+                                       wfWaitForSlaves();
+                               }
+                               $this->output( "done.\n" );
+                       }
+               } else {
+                       $this->output( "not a JSON page? Skipping\n" );
+               }
+       }
+}
+
+$maintClass = 'FixDefaultJsonContentPages';
+require_once RUN_MAINTENANCE_IF_MAIN;