<?php
/**
* @todo document
- * @package MediaWiki
- * @subpackage Maintenance
+ * @addtogroup Maintenance
*/
/** */
-define( "REPORTING_INTERVAL", 50 );
-define( "PAUSE_INTERVAL", 50 );
+define( "REPORTING_INTERVAL", 100 );
+#define( "REPORTING_INTERVAL", 1 );
-function refreshLinks( $start ) {
- global $wgUser, $wgTitle, $wgArticle, $wgEnablePersistentLC, $wgLinkCache, $wgOut;
+function refreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0 ) {
+ global $wgUser, $wgParser, $wgUseImageResize, $wgUseTidy;
- $dbw =& wfGetDB( DB_MASTER );
-
- $end = $dbw->selectField( 'cur', 'max(cur_id)', false );
-
- print("Refreshing link table. Starting from cur_id $start of $end.\n");
+ $fname = 'refreshLinks';
+ $dbr = wfGetDB( DB_SLAVE );
+ $start = intval( $start );
# Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
- $wgUser->setOption("math", 3);
-
+ $wgUser->setOption('math', MW_MATH_SOURCE);
- for ($id = $start; $id <= $end; $id++) {
- if ( !($id % REPORTING_INTERVAL) ) {
- print "$id\n";
- }
+ # Don't generate extension images (e.g. Timeline)
+ $wgParser->mTagHooks = array();
+
+ # Don't generate thumbnail images
+ $wgUseImageResize = false;
+ $wgUseTidy = false;
+
+ if ( $newOnly ) {
+ print "Refreshing links from ";
+ $res = $dbr->select( 'page',
+ array( 'page_id' ),
+ array(
+ 'page_is_new' => 1,
+ "page_id > $start" ),
+ $fname
+ );
+ $num = $dbr->numRows( $res );
+ print "$num new articles...\n";
+
+ $i = 0;
+ while ( $row = $dbr->fetchObject( $res ) ) {
+ if ( !( ++$i % REPORTING_INTERVAL ) ) {
+ print "$i\n";
+ wfWaitForSlaves( $maxLag );
+ }
- if ( !($id % PAUSE_INTERVAL) ) {
- sleep(1);
+ fixLinksFromArticle( $row->page_id );
}
-
- $wgTitle = Title::newFromID( $id );
- if ( is_null( $wgTitle ) ) {
- continue;
+ } else {
+ print "Refreshing link table.\n";
+ if ( !$end ) {
+ $end = $dbr->selectField( 'page', 'max(page_id)', false );
}
- $dbw->query("BEGIN");
-
- $wgArticle = new Article( $wgTitle );
- $text = $wgArticle->getContent( true );
- $wgLinkCache = new LinkCache;
- $wgLinkCache->forUpdate( true );
-
- global $wgLinkHolders;
- $wgLinkHolders = array(
- 'namespaces' => array(),
- 'dbkeys' => array(),
- 'queries' => array(),
- 'texts' => array(),
- 'titles' => array()
- );
+ print("Starting from page_id $start of $end.\n");
+ for ($id = $start; $id <= $end; $id++) {
- # Parse the text and replace links with placeholders
- $wgOut->addWikiText( $text );
-
- # Look up the links in the DB and add them to the link cache
- $wgOut->transformBuffer( RLH_FOR_UPDATE );
- $wgOut->clearHTML();
+ if ( !($id % REPORTING_INTERVAL) ) {
+ print "$id\n";
+ wfWaitForSlaves( $maxLag );
+ }
+ fixLinksFromArticle( $id );
+ }
+ }
+}
- if ( $wgEnablePersistentLC ) {
- $wgLinkCache->saveToLinkscc( $id, $dbw->strencode( $wgTitle->getPrefixedDBkey() ) );
+function fixLinksFromArticle( $id ) {
+ global $wgTitle, $wgParser;
+
+ $wgTitle = Title::newFromID( $id );
+ $dbw = wfGetDB( DB_MASTER );
+
+ $linkCache =& LinkCache::singleton();
+ $linkCache->clear();
+
+ if ( is_null( $wgTitle ) ) {
+ return;
+ }
+ $dbw->begin();
+
+ $revision = Revision::newFromTitle( $wgTitle );
+ if ( !$revision ) {
+ return;
+ }
+
+ $options = new ParserOptions;
+ $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() );
+ $update = new LinksUpdate( $wgTitle, $parserOutput, false );
+ $update->doUpdate();
+ $dbw->immediateCommit();
+}
+
+function deleteLinksFromNonexistent( $maxLag = 0 ) {
+ $fname = 'deleteLinksFromNonexistent';
+
+ wfWaitForSlaves( $maxLag );
+
+ $dbw = wfGetDB( DB_WRITE );
+
+ $linksTables = array(
+ 'pagelinks' => 'pl_from',
+ 'imagelinks' => 'il_from',
+ 'categorylinks' => 'cl_from',
+ 'templatelinks' => 'tl_from',
+ 'externallinks' => 'el_from',
+ );
+
+ $page = $dbw->tableName( 'page' );
+
+
+ foreach ( $linksTables as $table => $field ) {
+ if ( !$dbw->ping() ) {
+ print "DB disconnected, reconnecting...";
+ while ( !$dbw->ping() ) {
+ print ".";
+ sleep(10);
+ }
+ print "\n";
}
- $linksUpdate = new LinksUpdate( $id, $wgTitle->getPrefixedDBkey() );
- /** FIXME
- * In ./includes/LinksUpdate.php doDumbUpdate is commented with:
- * "Old inefficient update function"
- * Probably need to call doUpdate instead.
- */
- $linksUpdate->doDumbUpdate();
- $linksUpdate->fixBrokenLinks();
- $dbw->query("COMMIT");
+ $pTable = $dbw->tableName( $table );
+ $sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL";
+
+ print "Deleting $table from non-existent articles...";
+ $dbw->query( $sql, $fname );
+ print " fixed " .$dbw->affectedRows() . " row(s)\n";
}
}
+
?>