Localization update for he and fixing comment in en.
[lhc/web/wiklou.git] / maintenance / refreshLinks.inc
index f334895..b7d531c 100644 (file)
 <?php
 /**
  * @todo document
- * @package MediaWiki
- * @subpackage Maintenance
- * @version $Id$
+ * @file
+ * @ingroup Maintenance
  */
 
-/** */
-define( "REPORTING_INTERVAL", 50 );
-define( "PAUSE_INTERVAL", 50 );
+function refreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) {
+       global $wgUser, $wgParser, $wgUseTidy;
 
-function refreshLinks( $start ) {
-       global $wgUser, $wgTitle, $wgArticle, $wgEnablePersistentLC, $wgLinkCache, $wgOut;
+       $reportingInterval = 100;
+       $fname = 'refreshLinks';
+       $dbr = wfGetDB( DB_SLAVE );
+       $start = intval( $start );
 
-       $dbw =& wfGetDB( DB_MASTER );
-       
-       $end = $dbw->selectField( 'cur', 'max(cur_id)', false );
+       # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
+       $wgUser->setOption('math', MW_MATH_SOURCE);
+
+       # Don't generate extension images (e.g. Timeline)
+       if( method_exists( $wgParser, "clearTagHooks" ) ) {
+               $wgParser->clearTagHooks();
+       }
 
-       print("Refreshing link table. Starting from cur_id $start of $end.\n");
+       # Don't use HTML tidy
+       $wgUseTidy = false;
 
-       # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
-       $wgUser->setOption("math", 3);
-       
+       $what = $redirectsOnly ? "redirects" : "links";
 
-       for ($id = $start; $id <= $end; $id++) {
-               if ( !($id % REPORTING_INTERVAL) ) {
-                       print "$id\n";
+       if( $oldRedirectsOnly ) {
+               # This entire code path is cut-and-pasted from below.  Hurrah.
+               $res = $dbr->query(
+                       "SELECT page_id ".
+                       "FROM page ".
+                       "LEFT JOIN redirect ON page_id=rd_from ".
+                       "WHERE page_is_redirect=1 AND rd_from IS NULL AND ".
+                       ($end == 0 ? "page_id >= $start"
+                                  : "page_id BETWEEN $start AND $end"),
+                       $fname
+               );
+               $num = $dbr->numRows( $res );
+               print "Refreshing $num old redirects from $start...\n";
+
+               while( $row = $dbr->fetchObject( $res ) ) {
+                       if ( !( ++$i % $reportingInterval ) ) {
+                               print "$i\n";
+                               wfWaitForSlaves( $maxLag );
+                       }
+                       fixRedirect( $row->page_id );
                }
+       } elseif( $newOnly ) {
+               print "Refreshing $what from ";
+               $res = $dbr->select( 'page',
+                       array( 'page_id' ),
+                       array(
+                               'page_is_new' => 1,
+                               "page_id >= $start" ),
+                       $fname
+               );
+               $num = $dbr->numRows( $res );
+               print "$num new articles...\n";
 
-               if ( !($id % PAUSE_INTERVAL) ) {
-                       sleep(1);
+               $i = 0;
+               while ( $row = $dbr->fetchObject( $res ) ) {
+                       if ( !( ++$i % $reportingInterval ) ) {
+                               print "$i\n";
+                               wfWaitForSlaves( $maxLag );
+                       }
+                       if($redirectsOnly)
+                               fixRedirect( $row->page_id );
+                       else
+                               fixLinksFromArticle( $row->page_id );
                }
-               
-               $wgTitle = Title::newFromID( $id );
-               if ( is_null( $wgTitle ) ) {
-                       continue;
+       } else {
+               print "Refreshing $what table.\n";
+               if ( !$end ) {
+                       $end = $dbr->selectField( 'page', 'max(page_id)', false );
                }
-               $dbw->query("BEGIN");
+               print("Starting from page_id $start of $end.\n");
 
-               $wgArticle = new Article( $wgTitle );
-               $text = $wgArticle->getContent( true );
-               $wgLinkCache = new LinkCache;
-               $wgLinkCache->forUpdate( true );
+               for ($id = $start; $id <= $end; $id++) {
 
-               # Parse the text and replace links with placeholders
-               $wgOut->addWikiText( $text );
-               
-               # Look up the links in the DB and add them to the link cache
-               $wgOut->transformBuffer( RLH_FOR_UPDATE );
-               $wgOut->clearHTML();
-
-               if ( $wgEnablePersistentLC ) {
-                       $wgLinkCache->saveToLinkscc( $id, $dbw->strencode( $wgTitle->getPrefixedDBkey() ) );
+                       if ( !($id % $reportingInterval) ) {
+                               print "$id\n";
+                               wfWaitForSlaves( $maxLag );
+                       }
+                       if($redirectsOnly)
+                               fixRedirect( $id );
+                       else
+                               fixLinksFromArticle( $id );
                }
+       }
+}
+
+function fixRedirect( $id ){
+       global $wgTitle, $wgArticle;
+
+       $wgTitle = Title::newFromID( $id );
+       $dbw = wfGetDB( DB_MASTER );
+
+       if ( is_null( $wgTitle ) ) {
+               return;
+       }
+       $wgArticle = new Article($wgTitle);
+
+       $rt = $wgArticle->followRedirect();
+
+       if($rt == false || !is_object($rt))
+               return;
+
+       $wgArticle->updateRedirectOn($dbw,$rt);
+}
+
+function fixLinksFromArticle( $id ) {
+       global $wgTitle, $wgParser;
 
-               $linksUpdate = new LinksUpdate( $id, $wgTitle->getPrefixedDBkey() );
-               $linksUpdate->doDumbUpdate();
-               $linksUpdate->fixBrokenLinks();
-               $dbw->query("COMMIT");
+       $wgTitle = Title::newFromID( $id );
+       $dbw = wfGetDB( DB_MASTER );
+
+       $linkCache =& LinkCache::singleton();
+       $linkCache->clear();
+
+       if ( is_null( $wgTitle ) ) {
+               return;
+       }
+       $dbw->begin();
+
+       $revision = Revision::newFromTitle( $wgTitle );
+       if ( !$revision ) {
+               return;
        }
+
+       $options = new ParserOptions;
+       $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() );
+       $update = new LinksUpdate( $wgTitle, $parserOutput, false );
+       $update->doUpdate();
+       $dbw->immediateCommit();
+}
+
+/*
+ * Removes non-existing links from pages from pagelinks, imagelinks,
+ * categorylinks, templatelinks and externallinks tables.
+ *
+ * @param $maxLag
+ * @param $batchSize The size of deletion batches
+ *
+ * @author Merlijn van Deen <valhallasw@arctus.nl>
+ */
+function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
+       wfWaitForSlaves( $maxLag );
+       
+       $dbw = wfGetDB( DB_MASTER );
+
+       $lb = wfGetLBFactory()->newMainLB();
+       $dbr = $lb->getConnection( DB_SLAVE );
+       $dbr->bufferResults( false );
+       
+       $linksTables = array( // table name => page_id field
+               'pagelinks' => 'pl_from',
+               'imagelinks' => 'il_from',
+               'categorylinks' => 'cl_from',
+               'templatelinks' => 'tl_from',
+               'externallinks' => 'el_from',
+       );
+       
+       foreach ( $linksTables as $table => $field ) {
+               print "Retrieving illegal entries from $table... ";
+               
+               // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL;
+               $results = $dbr->select( array( $table, 'page' ),
+                             $field,
+                             array('page_id' => null ),
+                             __METHOD__,
+                             'DISTINCT',
+                             array( 'page' => array( 'LEFT JOIN', "$field=page_id"))
+               );
+               
+               $counter = 0;
+               $list = array();
+               print "0..";
+               
+               foreach( $results as $row ) {
+                       $counter++;
+                       $list[] = $row->$field;
+                       if ( ( $counter % $batchSize ) == 0 ) {
+                               wfWaitForSlaves(5);
+                               $dbw->delete( $table, array( $field => $list ), __METHOD__ );
+                               
+                               print $counter . "..";
+                               $list = array();
+                       }
+               }
+               
+               print $counter;
+               if (count($list) > 0) {
+                       $dbw->delete( $table, array( $field => $list ), __METHOD__ );
+               }
+               
+               print "\n";
+       }
+       
+       $lb->closeAll();
 }
-?>