Add removeInvalidEmails.php maintenance script
authorKunal Mehta <legoktm@gmail.com>
Tue, 2 Dec 2014 20:40:23 +0000 (12:40 -0800)
committerKunal Mehta <legoktm@gmail.com>
Wed, 3 Dec 2014 20:24:33 +0000 (12:24 -0800)
The script scans the user table and removes emails
that are technically invalid which don't pass
Sanitizer::validateEmail(), and wouldn't be allowed
today. Confirmed emails are skipped entirely since
they had to be valid at some point.

Bug: T76512
Change-Id: I3cc6396ff6d8b738846b7716b4b0cddc9bf9e1a4

maintenance/removeInvalidEmails.php [new file with mode: 0644]

diff --git a/maintenance/removeInvalidEmails.php b/maintenance/removeInvalidEmails.php
new file mode 100644 (file)
index 0000000..7ff69a1
--- /dev/null
@@ -0,0 +1,78 @@
+<?php
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * A script to remove emails that are invalid from
+ * the user_email column of the user table. Emails
+ * are validated before users can add them, but
+ * this was not always the case so older users may
+ * have invalid ones.
+ *
+ * By default it does a dry-run, pass --commit
+ * to actually update the database.
+ */
+class RemoveInvalidEmails extends Maintenance {
+
+       private $commit = false;
+
+       public function __construct() {
+               parent::__construct();
+               $this->addOption( 'commit', 'Whether to actually update the database', false, false );
+               $this->setBatchSize( 500 );
+       }
+       public function execute() {
+               $this->commit = $this->hasOption( 'commit' );
+               $dbr = $this->getDB( DB_SLAVE );
+               $dbw = $this->getDB( DB_MASTER );
+               $lastId = 0;
+               do {
+                       $rows = $dbr->select(
+                               'user',
+                               array( 'user_id', 'user_email' ),
+                               array(
+                                       'user_id > ' . $dbr->addQuotes( $lastId ),
+                                       'user_email != ""',
+                                       'user_email_authenticated IS NULL'
+                               ),
+                               __METHOD__,
+                               array( 'LIMIT' => $this->mBatchSize )
+                       );
+                       $count = $rows->numRows();
+                       $badIds = array();
+                       foreach ( $rows as $row ) {
+                               if ( !Sanitizer::validateEmail( trim( $row->user_email ) ) ) {
+                                       $this->output( "Found bad email: {$row->user_email} for user #{$row->user_id}\n" );
+                                       $badIds[] = $row->user_id;
+                                       if ( $row->user_id > $lastId ) {
+                                               $lastId = $row->user_id;
+                                       }
+                               }
+                       }
+
+                       if ( $badIds ) {
+                               $badCount = count( $badIds );
+                               if ( $this->commit ) {
+                                       $this->output( "Removing $badCount emails from the database.\n" );
+                                       $dbw->update(
+                                               'user',
+                                               array( 'user_email' => '' ),
+                                               array( 'user_id' => $badIds ),
+                                               __METHOD__
+                                       );
+                                       foreach ( $badIds as $badId ) {
+                                               User::newFromId( $badId )->invalidateCache();
+                                       }
+                                       wfWaitForSlaves();
+                               } else {
+                                       $this->output( "Would have removed $badCount emails from the database.\n" );
+
+                               }
+                       }
+               } while ( $count !== 0 );
+               $this->output( "Done.\n" );
+       }
+}
+
+$maintClass = 'RemoveInvalidEmails';
+require_once RUN_MAINTENANCE_IF_MAIN;