Avoid DB rows with usable names but ID = 0 by introducing "interwiki" usernames
authorBrad Jorsch <bjorsch@wikimedia.org>
Wed, 25 Oct 2017 19:26:53 +0000 (15:26 -0400)
committerTim Starling <tstarling@wikimedia.org>
Thu, 30 Nov 2017 02:12:44 +0000 (13:12 +1100)
Importing revisions in MediaWiki has long been weird: if the username on
the imported revision exists locally it's automatically attributed to
the local user, while if the name does not exist locally we wind up with
revision table rows with rev_user = 0 and rev_user_text being a valid
name that someone might later create. "Global" blocks too create rows
with ipb_by = 0 an ipb_by_text being a valid name.

The upcoming actor table change, as things currently stand, would
regularize that a bit by automatically attributing those imported
revisions to the newly-created user. But that's not necessarily what we
actually want to happen. And it would certainly confuse CentralAuth's
attempt to detect its own global blocks.

Thus, this patch introduces "interwiki" usernames that aren't valid for
local use, of the format "iw>Example".[1] Linker will interpret these
names and generate an appropriate interwiki link in history pages and
the like, as if from wikitext like `[[iw:User:Example]]`.

Imports for non-existant local users (and optionally for existing local
users too) will credit the edit to such an interwiki name. There is also
a new hook, 'ImportHandleUnknownUser', to allow extension such as
CentralAuth to create local users as their edits are imported.

Block will no longer accept usable-but-nonexistent names for 'byText' or
->setBlocker(). CentralAuth's global blocks will be submitted with an
interwiki username (see Ieae5d24f9).

Wikis that have imported edits or CentralAuth global blocks should run
the new maintenance/cleanupUsersWithNoId.php maintenance script. This
isn't done by update.php because (1) it needs an interwiki prefix to use
and (2) the updater can't know whether to pass the `--assign` flag.

[1]: '>' was used instead of the more usual ':' because WMF wikis have
many existing usernames containing colons.

Bug: T9240
Bug: T20209
Bug: T111605
Change-Id: I5401941c06102e8faa813910519d55482dff36cb
Depends-On: Ieae5d24f9098c1977447c50a8d4e2cab58a24d9f

17 files changed:
RELEASE-NOTES-1.31
autoload.php
docs/hooks.txt
includes/Block.php
includes/DefaultSettings.php
includes/Linker.php
includes/api/ApiImport.php
includes/api/i18n/en.json
includes/api/i18n/qqq.json
includes/import/WikiImporter.php
includes/specials/SpecialImport.php
languages/i18n/en.json
languages/i18n/qqq.json
maintenance/cleanupUsersWithNoId.php [new file with mode: 0644]
maintenance/importDump.php
tests/phpunit/includes/BlockTest.php
tests/phpunit/includes/import/ImportTest.php

index c24d761..b32e3e7 100644 (file)
@@ -15,6 +15,8 @@ production.
   possible for fallback images such as png.
 * (T44246) $wgFilterLogTypes will no longer ignore 'patrol' when user does
   not have the right to mark things patrolled.
+* Wikis that contain imported revisions or CentralAuth global blocks should run
+  maintenance/cleanupUsersWithNoId.php.
 
 === New features in 1.31 ===
 * Wikimedia\Rdbms\IDatabase->select() and similar methods now support
@@ -22,6 +24,13 @@ production.
 * As a first pass in standardizing dialog boxes across the MediaWiki product,
   Html class now provides helper methods for messageBox, successBox, errorBox and
   warningBox generation.
+* (T9240) Imports will now record unknown (and, optionally, known) usernames in
+  a format like "iw>Example".
+* (T20209) Linker (used on history pages, log pages, and so on) will display
+  usernames formed like "iw>Example" as interwiki links, as if by wikitext like
+  [[iw:User:Example|iw>Example]].
+* (T111605) The 'ImportHandleUnknownUser' hook allows extensions to auto-create
+  users during an import.
 
 === External library changes in 1.31 ===
 
@@ -107,7 +116,9 @@ changes to languages because of Phabricator reports.
 * Passing a ParserOptions object to OutputPage::parserOptions() is deprecated.
 * Browser support for Opera 12 and older was removed.
   Opera 15+ continues at Grade A support.
-* …
+* The Block class will no longer accept usable-but-missing usernames for
+  'byText' or ->setBlocker(). Callers should either ensure the blocker exists
+  locally or use a new interwiki-format username like "iw>Example".
 
 == Compatibility ==
 MediaWiki 1.31 requires PHP 5.5.9 or later. There is experimental support for
index 51daced..2661fd7 100644 (file)
@@ -264,6 +264,7 @@ $wgAutoloadLocalClasses = [
        'CleanupPreferences' => __DIR__ . '/maintenance/cleanupPreferences.php',
        'CleanupRemovedModules' => __DIR__ . '/maintenance/cleanupRemovedModules.php',
        'CleanupSpam' => __DIR__ . '/maintenance/cleanupSpam.php',
+       'CleanupUsersWithNoId' => __DIR__ . '/maintenance/cleanupUsersWithNoId.php',
        'ClearInterwikiCache' => __DIR__ . '/maintenance/clearInterwikiCache.php',
        'ClearUserWatchlistJob' => __DIR__ . '/includes/jobqueue/jobs/ClearUserWatchlistJob.php',
        'CliInstaller' => __DIR__ . '/includes/installer/CliInstaller.php',
index 6c1597f..685a182 100644 (file)
@@ -1840,6 +1840,11 @@ $revisionInfo: Array of revision information
 Return false to stop further processing of the tag
 $reader: XMLReader object
 
+'ImportHandleUnknownUser': When a user does exist locally, this hook is called
+to give extensions an opportunity to auto-create it. If the auto-creation is
+successful, return false.
+$name: User name
+
 'ImportHandleUploadXMLTag': When parsing a XML tag in a file upload.
 Return false to stop further processing of the tag
 $reader: XMLReader object
index d1e78bb..0999ad2 100644 (file)
@@ -1479,9 +1479,19 @@ class Block {
 
        /**
         * Set the user who implemented (or will implement) this block
-        * @param User|string $user Local User object or username string for foreign users
+        * @param User|string $user Local User object or username string
         */
        public function setBlocker( $user ) {
+               if ( is_string( $user ) ) {
+                       $user = User::newFromName( $user, false );
+               }
+
+               if ( $user->isAnon() && User::isUsableName( $user->getName() ) ) {
+                       throw new InvalidArgumentException(
+                               'Blocker must be a local user or a name that cannot be a local user'
+                       );
+               }
+
                $this->blocker = $user;
        }
 
index dcbcb6e..7448cfc 100644 (file)
@@ -4852,6 +4852,7 @@ $wgReservedUsernames = [
        'msg:double-redirect-fixer', // Automatic double redirect fix
        'msg:usermessage-editor', // Default user for leaving user messages
        'msg:proxyblocker', // For $wgProxyList and Special:Blockme (removed in 1.22)
+       'msg:sorbs', // For $wgEnableDnsBlacklist etc.
        'msg:spambot_username', // Used by cleanupSpam.php
        'msg:autochange-username', // Used by anon category RC entries (parser functions, Lua & purges)
 ];
index 403b10a..a0332cf 100644 (file)
@@ -892,10 +892,26 @@ class Linker {
         */
        public static function userLink( $userId, $userName, $altUserName = false ) {
                $classes = 'mw-userlink';
+               $page = null;
                if ( $userId == 0 ) {
-                       $page = SpecialPage::getTitleFor( 'Contributions', $userName );
-                       if ( $altUserName === false ) {
-                               $altUserName = IP::prettifyIP( $userName );
+                       $pos = strpos( $userName, '>' );
+                       if ( $pos !== false ) {
+                               $iw = explode( ':', substr( $userName, 0, $pos ) );
+                               $firstIw = array_shift( $iw );
+                               $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup();
+                               if ( $interwikiLookup->isValidInterwiki( $firstIw ) ) {
+                                       $title = MWNamespace::getCanonicalName( NS_USER ) . ':' . substr( $userName, $pos + 1 );
+                                       if ( $iw ) {
+                                               $title = join( ':', $iw ) . ':' . $title;
+                                       }
+                                       $page = Title::makeTitle( NS_MAIN, $title, '', $firstIw );
+                               }
+                               $classes .= ' mw-extuserlink';
+                       } else {
+                               $page = SpecialPage::getTitleFor( 'Contributions', $userName );
+                               if ( $altUserName === false ) {
+                                       $altUserName = IP::prettifyIP( $userName );
+                               }
                        }
                        $classes .= ' mw-anonuserlink'; // Separate link class for anons (T45179)
                } else {
@@ -903,11 +919,12 @@ class Linker {
                }
 
                // Wrap the output with <bdi> tags for directionality isolation
-               return self::link(
-                       $page,
-                       '<bdi>' . htmlspecialchars( $altUserName !== false ? $altUserName : $userName ) . '</bdi>',
-                       [ 'class' => $classes ]
-               );
+               $linkText =
+                       '<bdi>' . htmlspecialchars( $altUserName !== false ? $altUserName : $userName ) . '</bdi>';
+
+               return $page
+                       ? self::link( $page, $linkText, [ 'class' => $classes ] )
+                       : Html::rawElement( 'span', [ 'class' => $classes ], $linkText );
        }
 
        /**
@@ -931,6 +948,11 @@ class Linker {
                $blockable = !( $flags & self::TOOL_LINKS_NOBLOCK );
                $addEmailLink = $flags & self::TOOL_LINKS_EMAIL && $userId;
 
+               if ( $userId == 0 && strpos( $userText, '>' ) !== false ) {
+                       // No tools for an external user
+                       return '';
+               }
+
                $items = [];
                if ( $talkable ) {
                        $items[] = self::userTalkLink( $userId, $userText );
index b46f0b1..a0f0a8d 100644 (file)
@@ -53,12 +53,18 @@ class ApiImport extends ApiBase {
                                $params['fullhistory'],
                                $params['templates']
                        );
+                       $usernamePrefix = $params['interwikisource'];
                } else {
                        $isUpload = true;
                        if ( !$user->isAllowed( 'importupload' ) ) {
                                $this->dieWithError( 'apierror-cantimport-upload' );
                        }
                        $source = ImportStreamSource::newFromUpload( 'xml' );
+                       $usernamePrefix = (string)$params['interwikiprefix'];
+                       if ( $usernamePrefix === '' ) {
+                               $encParamName = $this->encodeParamName( 'interwikiprefix' );
+                               $this->dieWithError( [ 'apierror-missingparam', $encParamName ] );
+                       }
                }
                if ( !$source->isOK() ) {
                        $this->dieStatus( $source );
@@ -81,6 +87,7 @@ class ApiImport extends ApiBase {
                                $this->dieStatus( $statusRootPage );
                        }
                }
+               $importer->setUsernamePrefix( $usernamePrefix, $params['assignknownusers'] );
                $reporter = new ApiImportReporter(
                        $importer,
                        $isUpload,
@@ -141,6 +148,9 @@ class ApiImport extends ApiBase {
                        'xml' => [
                                ApiBase::PARAM_TYPE => 'upload',
                        ],
+                       'interwikiprefix' => [
+                               ApiBase::PARAM_TYPE => 'string',
+                       ],
                        'interwikisource' => [
                                ApiBase::PARAM_TYPE => $this->getAllowedImportSources(),
                        ],
@@ -150,6 +160,7 @@ class ApiImport extends ApiBase {
                        'namespace' => [
                                ApiBase::PARAM_TYPE => 'namespace'
                        ],
+                       'assignknownusers' => false,
                        'rootpage' => null,
                        'tags' => [
                                ApiBase::PARAM_TYPE => 'tags',
index 85f17de..91c3e18 100644 (file)
        "apihelp-import-extended-description": "Note that the HTTP POST must be done as a file upload (i.e. using multipart/form-data) when sending a file for the <var>xml</var> parameter.",
        "apihelp-import-param-summary": "Log entry import summary.",
        "apihelp-import-param-xml": "Uploaded XML file.",
+       "apihelp-import-param-interwikiprefix": "For uploaded imports: interwiki prefix to apply to unknown user names (and known users if <var>$1assignknownusers</var> is set).",
+       "apihelp-import-param-assignknownusers": "Assign edits to local users where the named user exists locally.",
        "apihelp-import-param-interwikisource": "For interwiki imports: wiki to import from.",
        "apihelp-import-param-interwikipage": "For interwiki imports: page to import.",
        "apihelp-import-param-fullhistory": "For interwiki imports: import the full history, not just the current version.",
index 3bdf7c6..47afdc1 100644 (file)
        "apihelp-import-extended-description": "{{doc-apihelp-extended-description|import}}",
        "apihelp-import-param-summary": "{{doc-apihelp-param|import|summary|info=The parameter being documented here provides the summary used on the log messages about the import. The phrase \"Import summary\" here is grammatically equivalent to a phrase such as \"science book\", not \"eat food\".}}",
        "apihelp-import-param-xml": "{{doc-apihelp-param|import|xml}}",
+       "apihelp-import-param-interwikiprefix": "{{doc-apihelp-param|import|interwikiprefix}}",
+       "apihelp-import-param-assignknownusers": "{{doc-apihelp-param|import|assignknownusers}}",
        "apihelp-import-param-interwikisource": "{{doc-apihelp-param|import|interwikisource}}",
        "apihelp-import-param-interwikipage": "{{doc-apihelp-param|import|interwikipage}}",
        "apihelp-import-param-fullhistory": "{{doc-apihelp-param|import|fullhistory}}",
index a1f7e0c..bffc1a9 100644 (file)
@@ -47,6 +47,9 @@ class WikiImporter {
        private $countableCache = [];
        /** @var bool */
        private $disableStatisticsUpdate = false;
+       private $usernamePrefix = 'imported';
+       private $assignKnownUsers = false;
+       private $triedCreations = [];
 
        /**
         * Creates an ImportXMLReader drawing from the source provided
@@ -311,6 +314,16 @@ class WikiImporter {
                $this->mImportUploads = $import;
        }
 
+       /**
+        * @since 1.31
+        * @param string $usernamePrefix Prefix to apply to unknown (and possibly also known) usernames
+        * @param bool $assignKnownUsers Whether to apply the prefix to usernames that exist locally
+        */
+       public function setUsernamePrefix( $usernamePrefix, $assignKnownUsers ) {
+               $this->usernamePrefix = rtrim( (string)$usernamePrefix, ':>' );
+               $this->assignKnownUsers = (bool)$assignKnownUsers;
+       }
+
        /**
         * Statistics update can cause a lot of time
         * @since 1.29
@@ -716,9 +729,9 @@ class WikiImporter {
                }
 
                if ( !isset( $logInfo['contributor']['username'] ) ) {
-                       $revision->setUsername( 'Unknown user' );
+                       $revision->setUsername( $this->usernamePrefix . '>Unknown user' );
                } else {
-                       $revision->setUsername( $logInfo['contributor']['username'] );
+                       $revision->setUsername( $this->prefixUsername( $logInfo['contributor']['username'] ) );
                }
 
                return $this->logItemCallback( $revision );
@@ -911,9 +924,9 @@ class WikiImporter {
                if ( isset( $revisionInfo['contributor']['ip'] ) ) {
                        $revision->setUserIP( $revisionInfo['contributor']['ip'] );
                } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
-                       $revision->setUsername( $revisionInfo['contributor']['username'] );
+                       $revision->setUsername( $this->prefixUsername( $revisionInfo['contributor']['username'] ) );
                } else {
-                       $revision->setUsername( 'Unknown user' );
+                       $revision->setUsername( $this->usernamePrefix . '>Unknown user' );
                }
                if ( isset( $revisionInfo['sha1'] ) ) {
                        $revision->setSha1Base36( $revisionInfo['sha1'] );
@@ -1020,13 +1033,43 @@ class WikiImporter {
                        $revision->setUserIP( $uploadInfo['contributor']['ip'] );
                }
                if ( isset( $uploadInfo['contributor']['username'] ) ) {
-                       $revision->setUsername( $uploadInfo['contributor']['username'] );
+                       $revision->setUsername( $this->prefixUsername( $uploadInfo['contributor']['username'] ) );
                }
                $revision->setNoUpdates( $this->mNoUpdates );
 
                return call_user_func( $this->mUploadCallback, $revision );
        }
 
+       /**
+        * Add an interwiki prefix to the username, if appropriate
+        * @since 1.31
+        * @param string $name Name being imported
+        * @return string Name, possibly with the prefix prepended.
+        */
+       protected function prefixUsername( $name ) {
+               if ( !User::isUsableName( $name ) ) {
+                       return $name;
+               }
+
+               if ( $this->assignKnownUsers ) {
+                       if ( User::idFromName( $name ) ) {
+                               return $name;
+                       }
+
+                       // See if any extension wants to create it.
+                       if ( !isset( $this->triedCreations[$name] ) ) {
+                               $this->triedCreations[$name] = true;
+                               if ( !Hooks::run( 'ImportHandleUnknownUser', [ $name ] ) &&
+                                       User::idFromName( $name, User::READ_LATEST )
+                               ) {
+                                       return $name;
+                               }
+                       }
+               }
+
+               return substr( $this->usernamePrefix . '>' . $name, 0, 255 );
+       }
+
        /**
         * @return array
         */
index 9ce52ef..ab5d4d7 100644 (file)
@@ -43,6 +43,8 @@ class SpecialImport extends SpecialPage {
        private $includeTemplates = false;
        private $pageLinkDepth;
        private $importSources;
+       private $assignKnownUsers;
+       private $usernamePrefix;
 
        public function __construct() {
                parent::__construct( 'Import', 'import' );
@@ -110,6 +112,7 @@ class SpecialImport extends SpecialPage {
                $isUpload = false;
                $request = $this->getRequest();
                $this->sourceName = $request->getVal( "source" );
+               $this->assignKnownUsers = $request->getCheck( 'assignKnownUsers' );
 
                $this->logcomment = $request->getText( 'log-comment' );
                $this->pageLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' ) == 0
@@ -130,6 +133,7 @@ class SpecialImport extends SpecialPage {
                        $source = Status::newFatal( 'import-token-mismatch' );
                } elseif ( $this->sourceName === 'upload' ) {
                        $isUpload = true;
+                       $this->usernamePrefix = $this->fullInterwikiPrefix = $request->getVal( 'usernamePrefix' );
                        if ( $user->isAllowed( 'importupload' ) ) {
                                $source = ImportStreamSource::newFromUpload( "xmlimport" );
                        } else {
@@ -169,6 +173,10 @@ class SpecialImport extends SpecialPage {
                        $source = Status::newFatal( "importunknownsource" );
                }
 
+               if ( (string)$this->fullInterwikiPrefix === '' ) {
+                       $source->fatal( 'importnoprefix' );
+               }
+
                $out = $this->getOutput();
                if ( !$source->isGood() ) {
                        $out->addWikiText( "<p class=\"error\">\n" .
@@ -192,6 +200,7 @@ class SpecialImport extends SpecialPage {
                                        return;
                                }
                        }
+                       $importer->setUsernamePrefix( $this->fullInterwikiPrefix, $this->assignKnownUsers );
 
                        $out->addWikiMsg( "importstart" );
 
@@ -336,6 +345,28 @@ class SpecialImport extends SpecialPage {
                                        Html::input( 'xmlimport', '', 'file', [ 'id' => 'xmlimport' ] ) . ' ' .
                                        "</td>
                                </tr>
+                               <tr>
+                                       <td class='mw-label'>" .
+                                       Xml::label( $this->msg( 'import-upload-username-prefix' )->text(),
+                                               'mw-import-usernamePrefix' ) .
+                                       "</td>
+                                       <td class='mw-input'>" .
+                                       Xml::input( 'usernamePrefix', 50,
+                                               $this->usernamePrefix,
+                                               [ 'id' => 'usernamePrefix', 'type' => 'text' ] ) . ' ' .
+                                       "</td>
+                               </tr>
+                               <tr>
+                                       <td></td>
+                                       <td class='mw-input'>" .
+                                       Xml::checkLabel(
+                                               $this->msg( 'import-assign-known-users' )->text(),
+                                               'assignKnownUsers',
+                                               'assignKnownUsers',
+                                               $this->assignKnownUsers
+                                       ) .
+                                       "</td>
+                               </tr>
                                <tr>
                                        <td class='mw-label'>" .
                                        Xml::label( $this->msg( 'import-comment' )->text(), 'mw-import-comment' ) .
@@ -489,6 +520,17 @@ class SpecialImport extends SpecialPage {
                                        ) .
                                        "</td>
                                </tr>
+                               <tr>
+                                       <td></td>
+                                       <td class='mw-input'>" .
+                                       Xml::checkLabel(
+                                               $this->msg( 'import-assign-known-users' )->text(),
+                                               'assignKnownUsers',
+                                               'assignKnownUsers',
+                                               $this->assignKnownUsers
+                                       ) .
+                                       "</td>
+                               </tr>
                                $importDepth
                                <tr>
                                        <td class='mw-label'>" .
index 1fecca0..764d254 100644 (file)
        "import-mapping-namespace": "Import to a namespace:",
        "import-mapping-subpage": "Import as subpages of the following page:",
        "import-upload-filename": "Filename:",
+       "import-upload-username-prefix": "Interwiki prefix:",
+       "import-assign-known-users": "Assign edits to local users where the named user exists locally",
        "import-comment": "Comment:",
        "importtext": "Please export the file from the source wiki using the [[Special:Export|export utility]].\nSave it to your computer and upload it here.",
        "importstart": "Importing pages...",
        "imported-log-entries": "Imported $1 {{PLURAL:$1|log entry|log entries}}.",
        "importfailed": "Import failed: <nowiki>$1</nowiki>",
        "importunknownsource": "Unknown import source type",
+       "importnoprefix": "No interwiki prefix was supplied",
        "importcantopen": "Could not open import file",
        "importbadinterwiki": "Bad interwiki link",
        "importsuccess": "Import finished!",
index 83c8a93..d8eb61b 100644 (file)
        "import-mapping-namespace": "Used as label for the second of three radio buttons in Import form on [[Special:Import]]. The radio button is followed by a drop-down list from which the user can select a namespace.\n\nSee also:\n* {{msg-mw|Import-mapping-default}}\n* {{msg-mw|Import-mapping-subpage}}",
        "import-mapping-subpage": "Used as label for the third of three radio buttons in Import form on [[Special:Import]]. The radio button is followed by a text box in which the user can type a page name. The imported pages will be created as subpages of the entered page name.\n\nSee also:\n* {{msg-mw|Import-mapping-default}}\n* {{msg-mw|Import-mapping-namespace}}",
        "import-upload-filename": "Used on [[Special:Import]] as label for upload of an XML file containing the pages to import.\n{{Identical|Filename}}",
+       "import-upload-username-prefix": "Used as label for input box in [[Special:Import]].",
+       "import-assign-known-users": "Use as label for checkbox in [[Special:Import]].",
        "import-comment": "Used as label for input box in [[Special:Import]].\n\nSee also:\n* {{msg-mw|Import-interwiki-history}}\n* {{msg-mw|Import-interwiki-templates}}\n* {{msg-mw|Import-interwiki-namespace}}\n* {{msg-mw|Import-interwiki-rootpage}}\n* {{msg-mw|Import-interwiki-submit}}\n{{Identical|Comment}}",
        "importtext": "Used in the Import form on [[Special:Import]].",
        "importstart": "Used in [[Special:Import]].\n\nSee also:\n* {{msg-mw|Importsuccess}}\n* {{msg-mw|Importfailed}}",
        "imported-log-entries": "Used as success message. Parameters:\n* $1 - number of log items\nSee also:\n* {{msg-mw|Importnopages}} - fatal error message",
        "importfailed": "Used as error message in [[Special:Import]]. Parameters:\n* $1 - import source\nSee also:\n* {{msg-mw|Importstart}}\n* {{msg-mw|Importsuccess}}",
        "importunknownsource": "Used as error message in [[Special:Import]].\n\nSee also:\n* {{msg-mw|import-token-mismatch}}\n* {{msg-mw|import-invalid-interwiki}}\n* {{msg-mw|Importunknownsource}}",
+       "importnoprefix": "Used as error message in [[Special:Import]]. Usually this error means that import via upload was attempted and the {{msg-mw|import-upload-username-prefix}} field was left empty.",
        "importcantopen": "Used as error message when importing from file or from URL.",
        "importbadinterwiki": "Used as error message when importing from interwiki.\n\nSee also:\n* {{msg-mw|Import-noarticle}}\n* {{msg-mw|Importbadinterwiki}}",
        "importsuccess": "Used in [[Special:Import]].\n\nSee also:\n* {{msg-mw|Importstart}}\n* {{msg-mw|Importfailed}}",
diff --git a/maintenance/cleanupUsersWithNoId.php b/maintenance/cleanupUsersWithNoId.php
new file mode 100644 (file)
index 0000000..74167d1
--- /dev/null
@@ -0,0 +1,212 @@
+<?php
+/**
+ * Cleanup tables that have valid usernames with no user ID
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+use Wikimedia\Rdbms\IDatabase;
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that cleans up tables that have valid usernames with no
+ * user ID.
+ *
+ * @ingroup Maintenance
+ * @since 1.31
+ */
+class CleanupUsersWithNoId extends LoggedUpdateMaintenance {
+       private $prefix, $table, $assign;
+       private $triedCreations = [];
+
+       public function __construct() {
+               parent::__construct();
+               $this->addDescription( 'Cleans up tables that have valid usernames with no user ID' );
+               $this->addOption( 'prefix', 'Interwiki prefix to apply to the usernames', true, true, 'p' );
+               $this->addOption( 'table', 'Only clean up this table', false, true );
+               $this->addOption( 'assign', 'Assign edits to existing local users if they exist', false, false );
+               $this->setBatchSize( 100 );
+       }
+
+       protected function getUpdateKey() {
+               return __CLASS__;
+       }
+
+       protected function doDBUpdates() {
+               $this->prefix = $this->getOption( 'prefix' );
+               $this->table = $this->getOption( 'table', null );
+               $this->assign = $this->getOption( 'assign' );
+
+               $this->cleanup(
+                       'revision', 'rev_id', 'rev_user', 'rev_user_text',
+                       [ 'rev_user' => 0 ], [ 'rev_timestamp', 'rev_id' ]
+               );
+               $this->cleanup(
+                       'archive', 'ar_id', 'ar_user', 'ar_user_text',
+                       [], [ 'ar_id' ]
+               );
+               $this->cleanup(
+                       'logging', 'log_id', 'log_user', 'log_user_text',
+                       [ 'log_user' => 0 ], [ 'log_timestamp', 'log_id' ]
+               );
+               $this->cleanup(
+                       'image', 'img_name', 'img_user', 'img_user_text',
+                       [ 'img_user' => 0 ], [ 'img_timestamp', 'img_name' ]
+               );
+               $this->cleanup(
+                       'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_user', 'oi_user_text',
+                       [], [ 'oi_name', 'oi_timestamp' ]
+               );
+               $this->cleanup(
+                       'filearchive', 'fa_id', 'fa_user', 'fa_user_text',
+                       [], [ 'fa_id' ]
+               );
+               $this->cleanup(
+                       'ipblocks', 'ipb_id', 'ipb_by', 'ipb_by_text',
+                       [], [ 'ipb_id' ]
+               );
+               $this->cleanup(
+                       'recentchanges', 'rc_id', 'rc_user', 'rc_user_text',
+                       [], [ 'rc_id' ]
+               );
+
+               return true;
+       }
+
+       /**
+        * Calculate a "next" condition and progress display string
+        * @param IDatabase $dbw
+        * @param string[] $indexFields Fields in the index being ordered by
+        * @param object $row Database row
+        * @return array [ string $next, string $display ]
+        */
+       private function makeNextCond( $dbw, $indexFields, $row ) {
+               $next = '';
+               $display = [];
+               for ( $i = count( $indexFields ) - 1; $i >= 0; $i-- ) {
+                       $field = $indexFields[$i];
+                       $display[] = $field . '=' . $row->$field;
+                       $value = $dbw->addQuotes( $row->$field );
+                       if ( $next === '' ) {
+                               $next = "$field > $value";
+                       } else {
+                               $next = "$field > $value OR $field = $value AND ($next)";
+                       }
+               }
+               $display = join( ' ', array_reverse( $display ) );
+               return [ $next, $display ];
+       }
+
+       /**
+        * Cleanup a table
+        *
+        * @param string $table Table to migrate
+        * @param string|string[] $primaryKey Primary key of the table.
+        * @param string $idField User ID field name
+        * @param string $nameField User name field name
+        * @param array $conds Query conditions
+        * @param string[] $orderby Fields to order by
+        */
+       protected function cleanup(
+               $table, $primaryKey, $idField, $nameField, array $conds, array $orderby
+       ) {
+               if ( $this->table !== null && $this->table !== $table ) {
+                       return;
+               }
+
+               $primaryKey = (array)$primaryKey;
+               $pkFilter = array_flip( $primaryKey );
+               $this->output(
+                       "Beginning cleanup of $table\n"
+               );
+
+               $dbw = $this->getDB( DB_MASTER );
+               $next = '1=1';
+               $countAssigned = 0;
+               $countPrefixed = 0;
+               while ( true ) {
+                       // Fetch the rows needing update
+                       $res = $dbw->select(
+                               $table,
+                               array_merge( $primaryKey, [ $idField, $nameField ], $orderby ),
+                               array_merge( $conds, [ $next ] ),
+                               __METHOD__,
+                               [
+                                       'ORDER BY' => $orderby,
+                                       'LIMIT' => $this->mBatchSize,
+                               ]
+                       );
+                       if ( !$res->numRows() ) {
+                               break;
+                       }
+
+                       // Update the existing rows
+                       foreach ( $res as $row ) {
+                               $name = $row->$nameField;
+                               if ( $row->$idField || !User::isUsableName( $name ) ) {
+                                       continue;
+                               }
+
+                               $id = 0;
+                               if ( $this->assign ) {
+                                       $id = (int)User::idFromName( $name );
+                                       if ( !$id ) {
+                                               // See if any extension wants to create it.
+                                               if ( !isset( $this->triedCreations[$name] ) ) {
+                                                       $this->triedCreations[$name] = true;
+                                                       if ( !Hooks::run( 'ImportHandleUnknownUser', [ $name ] ) ) {
+                                                               $id = (int)User::idFromName( $name, User::READ_LATEST );
+                                                       }
+                                               }
+                                       }
+                               }
+                               if ( $id ) {
+                                       $set = [ $idField => $id ];
+                                       $counter = &$countAssigned;
+                               } else {
+                                       $set = [ $nameField => substr( $this->prefix . '>' . $name, 0, 255 ) ];
+                                       $counter = &$countPrefixed;
+                               }
+
+                               $dbw->update(
+                                       $table,
+                                       $set,
+                                       array_intersect_key( (array)$row, $pkFilter ) + [
+                                               $idField => 0,
+                                               $nameField => $name,
+                                       ],
+                                       __METHOD__
+                               );
+                               $counter += $dbw->affectedRows();
+                       }
+
+                       list( $next, $display ) = $this->makeNextCond( $dbw, $orderby, $row );
+                       $this->output( "... $display\n" );
+                       wfWaitForSlaves();
+               }
+
+               $this->output(
+                       "Completed cleanup, assigned $countAssigned and prefixed $countPrefixed row(s)\n"
+               );
+       }
+}
+
+$maintClass = "CleanupUsersWithNoId";
+require_once RUN_MAINTENANCE_IF_MAIN;
index cf0e7d8..b6bbc2a 100644 (file)
@@ -82,6 +82,12 @@ TEXT
                );
                $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
                $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true );
+               $this->addOption( 'username-interwiki', 'Use interwiki usernames with this prefix', false, true );
+               $this->addOption( 'no-local-users',
+                       'Treat all usernames as interwiki. ' .
+                       'The default is to assign edits to local users where they exist.',
+                       false, false
+               );
                $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
        }
 
@@ -295,6 +301,12 @@ TEXT
                if ( $this->hasOption( 'no-updates' ) ) {
                        $importer->setNoUpdates( true );
                }
+               if ( $this->hasOption( 'username-prefix' ) ) {
+                       $importer->setUsernamePrefix(
+                               $this->getOption( 'username-prefix' ),
+                               !$this->hasOption( 'no-local-users' )
+                       );
+               }
                if ( $this->hasOption( 'rootpage' ) ) {
                        $statusRootPage = $importer->setTargetRootPage( $this->getOption( 'rootpage' ) );
                        if ( !$statusRootPage->isGood() ) {
index c422b51..70715e2 100644 (file)
@@ -160,7 +160,7 @@ class BlockTest extends MediaWikiLangTestCase {
                        'enableAutoblock' => true,
                        'hideName' => true,
                        'blockEmail' => true,
-                       'byText' => 'MetaWikiUser',
+                       'byText' => 'm>MetaWikiUser',
                ];
                $block = new Block( $blockOptions );
                $block->insert();
@@ -214,7 +214,7 @@ class BlockTest extends MediaWikiLangTestCase {
                        'enableAutoblock' => true,
                        'hideName' => true,
                        'blockEmail' => true,
-                       'byText' => 'MetaWikiUser',
+                       'byText' => 'Meta>MetaWikiUser',
                ];
                $block = new Block( $blockOptions );
 
@@ -230,8 +230,9 @@ class BlockTest extends MediaWikiLangTestCase {
                        'Correct blockee name'
                );
                $this->assertEquals( $userId, $block->getTarget()->getId(), 'Correct blockee id' );
-               $this->assertEquals( 'MetaWikiUser', $block->getBlocker(), 'Correct blocker name' );
-               $this->assertEquals( 'MetaWikiUser', $block->getByName(), 'Correct blocker name' );
+               $this->assertEquals( 'Meta>MetaWikiUser', $block->getBlocker()->getName(),
+                       'Correct blocker name' );
+               $this->assertEquals( 'Meta>MetaWikiUser', $block->getByName(), 'Correct blocker name' );
                $this->assertEquals( 0, $block->getBy(), 'Correct blocker id' );
        }
 
@@ -282,6 +283,7 @@ class BlockTest extends MediaWikiLangTestCase {
                        ],
                ];
 
+               $blocker = $this->getTestUser()->getUser();
                foreach ( $blockList as $insBlock ) {
                        $target = $insBlock['target'];
 
@@ -293,7 +295,7 @@ class BlockTest extends MediaWikiLangTestCase {
 
                        $block = new Block();
                        $block->setTarget( $target );
-                       $block->setBlocker( 'testblocker@global' );
+                       $block->setBlocker( $blocker );
                        $block->mReason = $insBlock['desc'];
                        $block->mExpiry = 'infinity';
                        $block->prevents( 'createaccount', $insBlock['ACDisable'] );
@@ -425,7 +427,7 @@ class BlockTest extends MediaWikiLangTestCase {
                        'reason' => 'test system block',
                        'timestamp' => wfTimestampNow(),
                        'expiry' => $this->db->getInfinity(),
-                       'byText' => 'MetaWikiUser',
+                       'byText' => 'MediaWiki default',
                        'systemBlock' => 'test',
                        'enableAutoblock' => true,
                ];
index 53d91c6..505653d 100644 (file)
@@ -220,4 +220,105 @@ EOF
                // @codingStandardsIgnoreEnd
        }
 
+       /**
+        * @dataProvider provideUnknownUserHandling
+        * @param bool $assign
+        * @param bool $create
+        */
+       public function testUnknownUserHandling( $assign, $create ) {
+               $hookId = -99;
+               $this->setMwGlobals( 'wgHooks', [
+                       'ImportHandleUnknownUser' => [ function ( $name ) use ( $assign, $create, &$hookId ) {
+                               if ( !$assign ) {
+                                       $this->fail( 'ImportHandleUnknownUser was called unexpectedly' );
+                               }
+
+                               $this->assertEquals( 'UserDoesNotExist', $name );
+                               if ( $create ) {
+                                       $user = User::createNew( $name );
+                                       $this->assertNotNull( $user );
+                                       $hookId = $user->getId();
+                                       return false;
+                               }
+                               return true;
+                       } ]
+               ] );
+
+               $user = $this->getTestUser()->getUser();
+
+               $n = ( $assign ? 1 : 0 ) + ( $create ? 2 : 0 );
+
+               // @codingStandardsIgnoreStart Generic.Files.LineLength
+               $source = $this->getDataSource( <<<EOF
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.10/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.10/ http://www.mediawiki.org/xml/export-0.10.xsd" version="0.10" xml:lang="en">
+  <page>
+    <title>TestImportPage</title>
+    <ns>0</ns>
+    <id>14</id>
+    <revision>
+      <id>15</id>
+      <timestamp>2016-01-01T0$n:00:00Z</timestamp>
+      <contributor>
+        <username>UserDoesNotExist</username>
+        <id>1</id>
+      </contributor>
+      <model>wikitext</model>
+      <format>text/x-wiki</format>
+      <text xml:space="preserve" bytes="3">foo</text>
+      <sha1>1e6gpc3ehk0mu2jqu8cg42g009s796b</sha1>
+    </revision>
+    <revision>
+      <id>16</id>
+      <timestamp>2016-01-01T0$n:00:01Z</timestamp>
+      <contributor>
+        <username>{$user->getName()}</username>
+        <id>{$user->getId()}</id>
+      </contributor>
+      <model>wikitext</model>
+      <format>text/x-wiki</format>
+      <text xml:space="preserve" bytes="3">bar</text>
+      <sha1>bjhlo6dxh5wivnszm93u4b78fheiy4t</sha1>
+    </revision>
+  </page>
+</mediawiki>
+EOF
+               );
+               // @codingStandardsIgnoreEnd
+
+               $importer = new WikiImporter( $source, MediaWikiServices::getInstance()->getMainConfig() );
+               $importer->setUsernamePrefix( 'Xxx', $assign );
+               $importer->doImport();
+
+               $db = wfGetDB( DB_MASTER );
+
+               $row = $db->selectRow(
+                       'revision',
+                       [ 'rev_user', 'rev_user_text' ],
+                       [ 'rev_timestamp' => "201601010{$n}0000" ],
+                       __METHOD__
+               );
+               $this->assertSame(
+                       $assign && $create ? 'UserDoesNotExist' : 'Xxx>UserDoesNotExist',
+                       $row->rev_user_text
+               );
+               $this->assertSame( $assign && $create ? $hookId : 0, (int)$row->rev_user );
+
+               $row = $db->selectRow(
+                       'revision',
+                       [ 'rev_user', 'rev_user_text' ],
+                       [ 'rev_timestamp' => "201601010{$n}0001" ],
+                       __METHOD__
+               );
+               $this->assertSame( ( $assign ? '' : 'Xxx>' ) . $user->getName(), $row->rev_user_text );
+               $this->assertSame( $assign ? $user->getId() : 0, (int)$row->rev_user );
+       }
+
+       public static function provideUnknownUserHandling() {
+               return [
+                       'no assign' => [ false, false ],
+                       'assign, no create' => [ true, false ],
+                       'assign, create' => [ true, true ],
+               ];
+       }
+
 }