X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=maintenance%2FrefreshLinks.php;h=8fe4d3ca859633c12f4d7120aa100cca0c62efd1;hb=4801267d0801b7f323f691c16c784b6cc9b613f0;hp=6dccefb3358a8a07722e1502943699e695de2ca0;hpb=291efd30f2173070df8c1939f659759521f8fae6;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/refreshLinks.php b/maintenance/refreshLinks.php index 6dccefb335..8fe4d3ca85 100644 --- a/maintenance/refreshLinks.php +++ b/maintenance/refreshLinks.php @@ -1,5 +1,7 @@ addOption( 'old-redirects-only', 'Only fix redirects with no redirect table entry' ); $this->addOption( 'm', 'Maximum replication lag', false, true ); $this->addOption( 'e', 'Last page id to refresh', false, true ); - $this->addArg( 'start', 'Page_id to start from, default 1' ); + $this->addArg( 'start', 'Page_id to start from, default 1', false ); $this->setBatchSize( 100 ); } public function execute() { - if( !$this->hasOption( 'dfn-only' ) ) { + $max = $this->getOption( 'm', 0 ); + if ( !$this->hasOption( 'dfn-only' ) ) { $start = $this->getArg( 0, 1 ); $new = $this->getOption( 'new-only', false ); - $max = $this->getOption( 'm', false ); $end = $this->getOption( 'e', 0 ); $redir = $this->getOption( 'redirects-only', false ); $oldRedir = $this->getOption( 'old-redirects-only', false ); @@ -56,49 +58,60 @@ class RefreshLinks extends Maintenance { * @param $redirectsOnly bool Only fix redirects * @param $oldRedirectsOnly bool Only fix redirects without redirect entries */ - private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, + private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) { - global $wgUser, $wgParser, $wgUseTidy; + global $wgParser, $wgUseTidy; $reportingInterval = 100; $dbr = wfGetDB( DB_SLAVE ); $start = intval( $start ); - # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway) - $wgUser->setOption('math', MW_MATH_SOURCE); + // Give extensions a chance to optimize settings + wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) ); # Don't generate extension images (e.g. Timeline) - if( method_exists( $wgParser, "clearTagHooks" ) ) { - $wgParser->clearTagHooks(); - } + $wgParser->clearTagHooks(); # Don't use HTML tidy $wgUseTidy = false; $what = $redirectsOnly ? "redirects" : "links"; - if( $oldRedirectsOnly ) { + if ( $oldRedirectsOnly ) { # This entire code path is cut-and-pasted from below. Hurrah. - $res = $dbr->query( - "SELECT page_id ". - "FROM page ". - "LEFT JOIN redirect ON page_id=rd_from ". - "WHERE page_is_redirect=1 AND rd_from IS NULL AND ". - ($end == 0 ? "page_id >= $start" - : "page_id BETWEEN $start AND $end"), - __METHOD__ + + $conds = array( + "page_is_redirect=1", + "rd_from IS NULL" + ); + + if ( $end == 0 ) { + $conds[] = "page_id >= $start"; + } else { + $conds[] = "page_id BETWEEN $start AND $end"; + } + + $res = $dbr->select( + array( 'page', 'redirect' ), + 'page_id', + $conds, + __METHOD__, + array(), + array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) ) ); $num = $dbr->numRows( $res ); $this->output( "Refreshing $num old redirects from $start...\n" ); - foreach( $res as $row ) { + $i = 0; + + foreach ( $res as $row ) { if ( !( ++$i % $reportingInterval ) ) { $this->output( "$i\n" ); - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); } $this->fixRedirect( $row->page_id ); } - } elseif( $newOnly ) { + } elseif ( $newOnly ) { $this->output( "Refreshing $what from " ); $res = $dbr->select( 'page', array( 'page_id' ), @@ -109,45 +122,48 @@ class RefreshLinks extends Maintenance { ); $num = $dbr->numRows( $res ); $this->output( "$num new articles...\n" ); - + $i = 0; foreach ( $res as $row ) { if ( !( ++$i % $reportingInterval ) ) { $this->output( "$i\n" ); - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); } - if($redirectsOnly) + if ( $redirectsOnly ) { $this->fixRedirect( $row->page_id ); - else - $this->fixLinksFromArticle( $row->page_id ); + } else { + self::fixLinksFromArticle( $row->page_id ); + } } } else { if ( !$end ) { - $end = $dbr->selectField( 'page', 'max(page_id)', false ); + $maxPage = $dbr->selectField( 'page', 'max(page_id)', false ); + $maxRD = $dbr->selectField( 'redirect', 'max(rd_from)', false ); + $end = max( $maxPage, $maxRD ); } $this->output( "Refreshing redirects table.\n" ); $this->output( "Starting from page_id $start of $end.\n" ); - - for ($id = $start; $id <= $end; $id++) { - - if ( !($id % $reportingInterval) ) { + + for ( $id = $start; $id <= $end; $id++ ) { + + if ( !( $id % $reportingInterval ) ) { $this->output( "$id\n" ); - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); } $this->fixRedirect( $id ); } - if(!$redirectsOnly) { + if ( !$redirectsOnly ) { $this->output( "Refreshing links table.\n" ); $this->output( "Starting from page_id $start of $end.\n" ); - for ($id = $start; $id <= $end; $id++) { - - if ( !($id % $reportingInterval) ) { + for ( $id = $start; $id <= $end; $id++ ) { + + if ( !( $id % $reportingInterval ) ) { $this->output( "$id\n" ); - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); } - $this->fixLinksFromArticle( $id ); + self::fixLinksFromArticle( $id ); } } } @@ -157,51 +173,56 @@ class RefreshLinks extends Maintenance { * Update the redirect entry for a given page * @param $id int The page_id of the redirect */ - private function fixRedirect( $id ){ - global $wgTitle, $wgArticle; - - $wgTitle = Title::newFromID( $id ); + private function fixRedirect( $id ) { + $title = Title::newFromID( $id ); $dbw = wfGetDB( DB_MASTER ); - - if ( is_null( $wgTitle ) ) { + + if ( is_null( $title ) ) { + // This page doesn't exist (any more) + // Delete any redirect table entry for it + $dbw->delete( 'redirect', array( 'rd_from' => $id ), + __METHOD__ ); return; } - $wgArticle = new Article($wgTitle); - - $rt = $wgArticle->followRedirect(); - - if($rt == false || !is_object($rt)) - return; - - $wgArticle->updateRedirectOn($dbw,$rt); + $article = new Article( $title ); + + $rt = $article->followRedirect(); + + if ( !$rt || !is_object( $rt ) ) { + // $title is not a redirect + // Delete any redirect table entry for it + $dbw->delete( 'redirect', array( 'rd_from' => $id ), + __METHOD__ ); + } else { + $article->updateRedirectOn( $dbw, $rt ); + } } /** * Run LinksUpdate for all links on a given page_id * @param $id int The page_id */ - private function fixLinksFromArticle( $id ) { - global $wgTitle, $wgParser; + public static function fixLinksFromArticle( $id ) { + global $wgParser; - $wgTitle = Title::newFromID( $id ); + $title = Title::newFromID( $id ); $dbw = wfGetDB( DB_MASTER ); - $linkCache =& LinkCache::singleton(); - $linkCache->clear(); + LinkCache::singleton()->clear(); - if ( is_null( $wgTitle ) ) { + if ( is_null( $title ) ) { return; } $dbw->begin(); - $revision = Revision::newFromTitle( $wgTitle ); + $revision = Revision::newFromTitle( $title ); if ( !$revision ) { return; } $options = new ParserOptions; - $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() ); - $update = new LinksUpdate( $wgTitle, $parserOutput, false ); + $parserOutput = $wgParser->parse( $revision->getText(), $title, $options, true, true, $revision->getId() ); + $update = new LinksUpdate( $title, $parserOutput, false ); $update->doUpdate(); $dbw->commit(); } @@ -216,7 +237,7 @@ class RefreshLinks extends Maintenance { * @author Merlijn van Deen */ private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); $dbw = wfGetDB( DB_MASTER ); @@ -238,21 +259,21 @@ class RefreshLinks extends Maintenance { // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL; $results = $dbr->select( array( $table, 'page' ), $field, - array('page_id' => null ), + array( 'page_id' => null ), __METHOD__, 'DISTINCT', - array( 'page' => array( 'LEFT JOIN', "$field=page_id")) + array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) ) ); $counter = 0; $list = array(); $this->output( "0.." ); - foreach( $results as $row ) { + foreach ( $results as $row ) { $counter++; $list[] = $row->$field; if ( ( $counter % $batchSize ) == 0 ) { - wfWaitForSlaves(5); + wfWaitForSlaves(); $dbw->delete( $table, array( $field => $list ), __METHOD__ ); $this->output( $counter . ".." ); @@ -260,7 +281,7 @@ class RefreshLinks extends Maintenance { } } $this->output( $counter ); - if (count($list) > 0) { + if ( count( $list ) > 0 ) { $dbw->delete( $table, array( $field => $list ), __METHOD__ ); } $this->output( "\n" ); @@ -270,4 +291,4 @@ class RefreshLinks extends Maintenance { } $maintClass = 'RefreshLinks'; -require_once( DO_MAINTENANCE ); +require_once( RUN_MAINTENANCE_IF_MAIN );