Merge "Rewrite logstash key conflict warning from I6677dbf6"
[lhc/web/wiklou.git] / maintenance / refreshLinks.php
index 95a49d6..e7a4d06 100644 (file)
@@ -29,6 +29,9 @@ require_once __DIR__ . '/Maintenance.php';
  * @ingroup Maintenance
  */
 class RefreshLinks extends Maintenance {
+       /** @var int|bool */
+       protected $namespace = false;
+
        public function __construct() {
                parent::__construct();
                $this->addDescription( 'Refresh link tables' );
@@ -39,6 +42,7 @@ class RefreshLinks extends Maintenance {
                $this->addOption( 'e', 'Last page id to refresh', false, true );
                $this->addOption( 'dfn-chunk-size', 'Maximum number of existent IDs to check per ' .
                        'query, default 100000', false, true );
+               $this->addOption( 'namespace', 'Only fix pages in this namespace', false, true );
                $this->addArg( 'start', 'Page_id to start from, default 1', false );
                $this->setBatchSize( 100 );
        }
@@ -51,6 +55,12 @@ class RefreshLinks extends Maintenance {
                $start = (int)$this->getArg( 0 ) ?: null;
                $end = (int)$this->getOption( 'e' ) ?: null;
                $dfnChunkSize = (int)$this->getOption( 'dfn-chunk-size', 100000 );
+               $ns = $this->getOption( 'namespace' );
+               if ( $ns === null ) {
+                       $this->namespace = false;
+               } else {
+                       $this->namespace = (int)$ns;
+               }
                if ( !$this->hasOption( 'dfn-only' ) ) {
                        $new = $this->getOption( 'new-only', false );
                        $redir = $this->getOption( 'redirects-only', false );
@@ -62,6 +72,12 @@ class RefreshLinks extends Maintenance {
                }
        }
 
+       private function namespaceCond() {
+               return $this->namespace !== false
+                       ? [ 'page_namespace' => $this->namespace ]
+                       : [];
+       }
+
        /**
         * Do the actual link refreshing.
         * @param int|null $start Page_id to start from
@@ -74,7 +90,7 @@ class RefreshLinks extends Maintenance {
                $end = null, $redirectsOnly = false, $oldRedirectsOnly = false
        ) {
                $reportingInterval = 100;
-               $dbr = $this->getDB( DB_SLAVE );
+               $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
 
                if ( $start === null ) {
                        $start = 1;
@@ -92,7 +108,7 @@ class RefreshLinks extends Maintenance {
                                "page_is_redirect=1",
                                "rd_from IS NULL",
                                self::intervalCond( $dbr, 'page_id', $start, $end ),
-                       ];
+                       ] + $this->namespaceCond();
 
                        $res = $dbr->select(
                                [ 'page', 'redirect' ],
@@ -121,7 +137,7 @@ class RefreshLinks extends Maintenance {
                                [
                                        'page_is_new' => 1,
                                        self::intervalCond( $dbr, 'page_id', $start, $end ),
-                               ],
+                               ] + $this->namespaceCond(),
                                __METHOD__
                        );
                        $num = $res->numRows();
@@ -136,7 +152,7 @@ class RefreshLinks extends Maintenance {
                                if ( $redirectsOnly ) {
                                        $this->fixRedirect( $row->page_id );
                                } else {
-                                       self::fixLinksFromArticle( $row->page_id );
+                                       self::fixLinksFromArticle( $row->page_id, $this->namespace );
                                }
                        }
                } else {
@@ -167,7 +183,7 @@ class RefreshLinks extends Maintenance {
                                                $this->output( "$id\n" );
                                                wfWaitForSlaves();
                                        }
-                                       self::fixLinksFromArticle( $id );
+                                       self::fixLinksFromArticle( $id, $this->namespace );
                                }
                        }
                }
@@ -195,6 +211,10 @@ class RefreshLinks extends Maintenance {
                        $dbw->delete( 'redirect', [ 'rd_from' => $id ],
                                __METHOD__ );
 
+                       return;
+               } elseif ( $this->namespace !== false
+                       && !$page->getTitle()->inNamespace( $this->namespace )
+               ) {
                        return;
                }
 
@@ -222,14 +242,18 @@ class RefreshLinks extends Maintenance {
        /**
         * Run LinksUpdate for all links on a given page_id
         * @param int $id The page_id
+        * @param int|bool $ns Only fix links if it is in this namespace
         */
-       public static function fixLinksFromArticle( $id ) {
+       public static function fixLinksFromArticle( $id, $ns = false ) {
                $page = WikiPage::newFromID( $id );
 
                LinkCache::singleton()->clear();
 
                if ( $page === null ) {
                        return;
+               } elseif ( $ns !== false
+                       && !$page->getTitle()->inNamespace( $ns ) ) {
+                       return;
                }
 
                $content = $page->getContent( Revision::RAW );
@@ -258,14 +282,15 @@ class RefreshLinks extends Maintenance {
        ) {
                wfWaitForSlaves();
                $this->output( "Deleting illegal entries from the links tables...\n" );
-               $dbr = $this->getDB( DB_SLAVE );
+               $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
                do {
                        // Find the start of the next chunk. This is based only
                        // on existent page_ids.
                        $nextStart = $dbr->selectField(
                                'page',
                                'page_id',
-                               self::intervalCond( $dbr, 'page_id', $start, $end ),
+                               [ self::intervalCond( $dbr, 'page_id', $start, $end ) ]
+                               + $this->namespaceCond(),
                                __METHOD__,
                                [ 'ORDER BY' => 'page_id', 'OFFSET' => $chunkSize ]
                        );
@@ -299,7 +324,7 @@ class RefreshLinks extends Maintenance {
         */
        private function dfnCheckInterval( $start = null, $end = null, $batchSize = 100 ) {
                $dbw = $this->getDB( DB_MASTER );
-               $dbr = $this->getDB( DB_SLAVE );
+               $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
 
                $linksTables = [ // table name => page_id field
                        'pagelinks' => 'pl_from',