Per Raymond, follow-up r95321: LinkSearch is in core too
[lhc/web/wiklou.git] / maintenance / refreshLinks.php
index 863dd02..1d33a8e 100644 (file)
@@ -1,5 +1,7 @@
 <?php
 /**
+ * Refresh link tables.
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -18,7 +20,7 @@
  * @ingroup Maintenance
  */
 
-require_once( dirname(__FILE__) . '/Maintenance.php' );
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
 
 class RefreshLinks extends Maintenance {
        public function __construct() {
@@ -35,10 +37,10 @@ class RefreshLinks extends Maintenance {
        }
 
        public function execute() {
-               if( !$this->hasOption( 'dfn-only' ) ) {
+               $max = $this->getOption( 'm', 0 );
+               if ( !$this->hasOption( 'dfn-only' ) ) {
                        $start = $this->getArg( 0, 1 );
                        $new = $this->getOption( 'new-only', false );
-                       $max = $this->getOption( 'm', false );
                        $end = $this->getOption( 'e', 0 );
                        $redir = $this->getOption( 'redirects-only', false );
                        $oldRedir = $this->getOption( 'old-redirects-only', false );
@@ -56,49 +58,60 @@ class RefreshLinks extends Maintenance {
         * @param $redirectsOnly bool Only fix redirects
         * @param $oldRedirectsOnly bool Only fix redirects without redirect entries
         */
-       private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, 
+       private function doRefreshLinks( $start, $newOnly = false, $maxLag = false,
                                                $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) {
-               global $wgUser, $wgParser, $wgUseTidy;
+               global $wgParser, $wgUseTidy;
 
                $reportingInterval = 100;
                $dbr = wfGetDB( DB_SLAVE );
                $start = intval( $start );
 
-               # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
-               $wgUser->setOption('math', MW_MATH_SOURCE);
+               // Give extensions a chance to optimize settings
+               wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) );
 
                # Don't generate extension images (e.g. Timeline)
-               if( method_exists( $wgParser, "clearTagHooks" ) ) {
-                       $wgParser->clearTagHooks();
-               }
+               $wgParser->clearTagHooks();
 
                # Don't use HTML tidy
                $wgUseTidy = false;
 
                $what = $redirectsOnly ? "redirects" : "links";
 
-               if( $oldRedirectsOnly ) {
+               if ( $oldRedirectsOnly ) {
                        # This entire code path is cut-and-pasted from below.  Hurrah.
-                       $res = $dbr->query(
-                               "SELECT page_id ".
-                               "FROM page ".
-                               "LEFT JOIN redirect ON page_id=rd_from ".
-                               "WHERE page_is_redirect=1 AND rd_from IS NULL AND ".
-                               ($end == 0 ? "page_id >= $start"
-                                                  : "page_id BETWEEN $start AND $end"),
-                               __METHOD__
+
+                       $conds = array(
+                               "page_is_redirect=1",
+                               "rd_from IS NULL"
+                       );
+
+                       if ( $end == 0 ) {
+                               $conds[] = "page_id >= $start";
+                       } else {
+                               $conds[] = "page_id BETWEEN $start AND $end";
+                       }
+
+                       $res = $dbr->select(
+                               array( 'page', 'redirect' ),
+                               'page_id',
+                               $conds,
+                               __METHOD__,
+                               array(),
+                               array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) )
                        );
                        $num = $dbr->numRows( $res );
                        $this->output( "Refreshing $num old redirects from $start...\n" );
 
-                       foreach( $res as $row ) {
+                       $i = 0;
+
+                       foreach ( $res as $row ) {
                                if ( !( ++$i % $reportingInterval ) ) {
                                        $this->output( "$i\n" );
-                                       wfWaitForSlaves( $maxLag );
+                                       wfWaitForSlaves();
                                }
                                $this->fixRedirect( $row->page_id );
                        }
-               } elseif( $newOnly ) {
+               } elseif ( $newOnly ) {
                        $this->output( "Refreshing $what from " );
                        $res = $dbr->select( 'page',
                                array( 'page_id' ),
@@ -109,17 +122,18 @@ class RefreshLinks extends Maintenance {
                        );
                        $num = $dbr->numRows( $res );
                        $this->output( "$num new articles...\n" );
-       
+
                        $i = 0;
                        foreach ( $res as $row ) {
                                if ( !( ++$i % $reportingInterval ) ) {
                                        $this->output( "$i\n" );
-                                       wfWaitForSlaves( $maxLag );
+                                       wfWaitForSlaves();
                                }
-                               if($redirectsOnly)
+                               if ( $redirectsOnly ) {
                                        $this->fixRedirect( $row->page_id );
-                               else
-                                       $this->fixLinksFromArticle( $row->page_id );
+                               } else {
+                                       self::fixLinksFromArticle( $row->page_id );
+                               }
                        }
                } else {
                        if ( !$end ) {
@@ -129,27 +143,27 @@ class RefreshLinks extends Maintenance {
                        }
                        $this->output( "Refreshing redirects table.\n" );
                        $this->output( "Starting from page_id $start of $end.\n" );
-       
-                       for ($id = $start; $id <= $end; $id++) {
-       
-                               if ( !($id % $reportingInterval) ) {
+
+                       for ( $id = $start; $id <= $end; $id++ ) {
+
+                               if ( !( $id % $reportingInterval ) ) {
                                        $this->output( "$id\n" );
-                                       wfWaitForSlaves( $maxLag );
+                                       wfWaitForSlaves();
                                }
                                $this->fixRedirect( $id );
                        }
 
-                       if(!$redirectsOnly) {
+                       if ( !$redirectsOnly ) {
                                $this->output( "Refreshing links table.\n" );
                                $this->output( "Starting from page_id $start of $end.\n" );
 
-                               for ($id = $start; $id <= $end; $id++) {
-       
-                                       if ( !($id % $reportingInterval) ) {
+                               for ( $id = $start; $id <= $end; $id++ ) {
+
+                                       if ( !( $id % $reportingInterval ) ) {
                                                $this->output( "$id\n" );
-                                               wfWaitForSlaves( $maxLag );
+                                               wfWaitForSlaves();
                                        }
-                                       $this->fixLinksFromArticle( $id );
+                                       self::fixLinksFromArticle( $id );
                                }
                        }
                }
@@ -159,30 +173,28 @@ class RefreshLinks extends Maintenance {
         * Update the redirect entry for a given page
         * @param $id int The page_id of the redirect
         */
-       private function fixRedirect( $id ){
-               global $wgTitle, $wgArticle;
-       
-               $wgTitle = Title::newFromID( $id );
+       private function fixRedirect( $id ) {
+               $title = Title::newFromID( $id );
                $dbw = wfGetDB( DB_MASTER );
-       
-               if ( is_null( $wgTitle ) ) {
+
+               if ( is_null( $title ) ) {
                        // This page doesn't exist (any more)
                        // Delete any redirect table entry for it
                        $dbw->delete( 'redirect', array( 'rd_from' => $id ),
                                __METHOD__ );
                        return;
                }
-               $wgArticle = new Article($wgTitle);
-       
-               $rt = $wgArticle->followRedirect();
-       
-               if($rt == false || !is_object($rt)) {
-                       // $wgTitle is not a redirect
+               $article = new Article( $title );
+
+               $rt = $article->followRedirect();
+
+               if ( !$rt || !is_object( $rt ) ) {
+                       // $title is not a redirect
                        // Delete any redirect table entry for it
                        $dbw->delete( 'redirect', array( 'rd_from' => $id ),
                                __METHOD__ );
                } else {
-                       $wgArticle->updateRedirectOn($dbw,$rt);
+                       $article->updateRedirectOn( $dbw, $rt );
                }
        }
 
@@ -190,35 +202,35 @@ class RefreshLinks extends Maintenance {
         * Run LinksUpdate for all links on a given page_id
         * @param $id int The page_id
         */
-       private function fixLinksFromArticle( $id ) {
-               global $wgTitle, $wgParser;
+       public static function fixLinksFromArticle( $id ) {
+               global $wgParser;
 
-               $wgTitle = Title::newFromID( $id );
+               $title = Title::newFromID( $id );
                $dbw = wfGetDB( DB_MASTER );
 
-               $linkCache =& LinkCache::singleton();
-               $linkCache->clear();
+               LinkCache::singleton()->clear();
 
-               if ( is_null( $wgTitle ) ) {
+               if ( is_null( $title ) ) {
                        return;
                }
-               $dbw->begin();
 
-               $revision = Revision::newFromTitle( $wgTitle );
+               $revision = Revision::newFromTitle( $title );
                if ( !$revision ) {
                        return;
                }
 
+               $dbw->begin();
+
                $options = new ParserOptions;
-               $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() );
-               $update = new LinksUpdate( $wgTitle, $parserOutput, false );
+               $parserOutput = $wgParser->parse( $revision->getText(), $title, $options, true, true, $revision->getId() );
+               $update = new LinksUpdate( $title, $parserOutput, false );
                $update->doUpdate();
                $dbw->commit();
        }
 
-       /*
+       /**
         * Removes non-existing links from pages from pagelinks, imagelinks,
-        * categorylinks, templatelinks and externallinks tables.
+        * categorylinks, templatelinks, externallinks, interwikilinks, langlinks and redirect tables.
         *
         * @param $maxLag
         * @param $batchSize The size of deletion batches
@@ -226,7 +238,7 @@ class RefreshLinks extends Maintenance {
         * @author Merlijn van Deen <valhallasw@arctus.nl>
         */
        private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
-               wfWaitForSlaves( $maxLag );
+               wfWaitForSlaves();
 
                $dbw = wfGetDB( DB_MASTER );
 
@@ -240,6 +252,10 @@ class RefreshLinks extends Maintenance {
                        'categorylinks' => 'cl_from',
                        'templatelinks' => 'tl_from',
                        'externallinks' => 'el_from',
+                       'iwlinks' => 'iwl_from',
+                       'langlinks' => 'll_from',
+                       'redirect' => 'rd_from',
+                       'page_props' => 'pp_page',
                );
 
                foreach ( $linksTables as $table => $field ) {
@@ -248,21 +264,20 @@ class RefreshLinks extends Maintenance {
                        // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL;
                        $results = $dbr->select( array( $table, 'page' ),
                                                  $field,
-                                                 array('page_id' => null ),
+                                                 array( 'page_id' => null ),
                                                  __METHOD__,
                                                  'DISTINCT',
-                                                 array( 'page' => array( 'LEFT JOIN', "$field=page_id"))
+                                                 array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) )
                        );
 
                        $counter = 0;
                        $list = array();
                        $this->output( "0.." );
-
-                       foreach( $results as $row ) {
+                       foreach ( $results as $row ) {
                                $counter++;
                                $list[] = $row->$field;
                                if ( ( $counter % $batchSize ) == 0 ) {
-                                       wfWaitForSlaves(5);
+                                       wfWaitForSlaves();
                                        $dbw->delete( $table, array( $field => $list ), __METHOD__ );
 
                                        $this->output( $counter . ".." );
@@ -270,7 +285,7 @@ class RefreshLinks extends Maintenance {
                                }
                        }
                        $this->output( $counter );
-                       if (count($list) > 0) {
+                       if ( count( $list ) > 0 ) {
                                $dbw->delete( $table, array( $field => $list ), __METHOD__ );
                        }
                        $this->output( "\n" );
@@ -280,4 +295,4 @@ class RefreshLinks extends Maintenance {
 }
 
 $maintClass = 'RefreshLinks';
-require_once( DO_MAINTENANCE );
+require_once( RUN_MAINTENANCE_IF_MAIN );