# See rebuildlinks.php, for example.
#
-# Turn this on if you've got memory to burn
-$wgUseMemoryTables = false;
-
# Buffer this many rows before inserting them all in one sweep. More
# than about 1000 will probably not increase speed significantly on
# most setups.
function rebuildLinkTables()
{
error_reporting (E_ALL);
- global $wgLang, $wgUseMemoryTables, $wgLinkCache, $rowbuf_size;
+ global $wgLang, $wgLinkCache, $rowbuf_size;
print "This script may take several hours to complete. If you abort during that time,\n";
print "your wiki will be in an inconsistent state. If you are going to abort, this is\n";
print "Setting AUTOCOMMIT=1\n";
wfQuery("SET SESSION AUTOCOMMIT=1", DB_WRITE);
+ print "Extracting often used data from cur (may take a few minutes)\n";
+ $sql = "CREATE TEMPORARY TABLE cur_fast SELECT cur_namespace, cur_title, cur_id FROM cur";
+ wfQuery( $sql, DB_WRITE );
+ $sql = "ALTER TABLE cur_fast ADD INDEX(cur_namespace, cur_title)";
+ wfQuery( $sql, DB_WRITE );
+
print "Locking tables\n";
- $sql = "LOCK TABLES cur READ, interwiki READ, user_newtalk READ, " .
+ $sql = "LOCK TABLES cur READ, cur_fast READ, interwiki READ, user_newtalk READ, " .
"links WRITE, brokenlinks WRITE, imagelinks WRITE";
wfQuery( $sql, DB_WRITE );
+
print "Deleting old data in links table.\n";
$sql = "DELETE FROM links";
wfQuery( $sql, DB_WRITE );
$numlinks = preg_match_all( "/\\[\\[([{$tc}]+)(]|\\|)/", $text,
$m, PREG_PATTERN_ORDER );
- $seen_links = array(); // seen links in this article
+ $seen_dbtitles = array(); // seen links (normalized and with ns, see below)
$titles_ready_for_insertion = array();
$titles_needing_curdata = array();
$titles_needing_curdata_pos = array();
for ( $i = 0 ; $i < $numlinks; ++$i ) {
$link = $m[1][$i];
-
- // We're only interested in the link once per article
- if( isset( $seen_links[$link] ) )
- continue;
- $seen_links[$link] = 1;
-
if( preg_match( '/^(http|https|ftp|mailto|news):/', $m[1][$i] ) ) {
# an URL link; not for us!
continue;
# FIXME: Handle subpage links
$nt = $titleCache->get( $link );
if( $nt != false ){
- $titles_ready_for_insertion[] = $nt;
+ // Only process each unique link once per page
+ $nt_key = $nt->getDBkey() . $nt->getNamespace();
+ if( isset( $seen_dbtitles[$nt_key] ) )
+ continue;
+ $seen_dbtitles[$nt_key] = 1;
+
+ $titles_ready_for_insertion[] = $nt;
} else {
$nt = Title::newFromText( $link );
if (! $nt) {
- print "\nerror in '$ns:{$from_full_title}': '$link'\n";
+ // Invalid link, probably something like "[[ ]]"
continue;
}
+
+ // Only process each unique link once per page
+ $nt_key = $nt->getDBkey() . $nt->getNamespace();
+ if( isset( $seen_dbtitles[$nt_key] ) )
+ continue;
+ $seen_dbtitles[$nt_key] = 1;
+
if( $nt->getInterwiki() != "" ) {
# Interwiki links are not stored in the link tables
continue;
$nt->mArticleID = 0; // assume broken link until proven otherwise
$pos = array_push($titles_needing_curdata, $nt) - 1;
- $titles_needing_curdata_pos[$nt->getDBkey()] = $pos;
+ $titles_needing_curdata_pos[$nt->getDBkey() . $nt->getNamespace()] = $pos;
$links_corresponding_to_titles[] = $link;
unset( $link ); // useless outside this loop, but tempting
}
$parts = array();
foreach ($titles_needing_curdata as $nt ) {
$parts[] = " (cur_namespace = " . $nt->getNamespace() . " AND " .
- "cur_title='" . wfStrencode( $nt->getDBkey() ) . "' AND ".
- "cur_namespace=" . intval( $nt->getNamespace() ) . ")";
+ "cur_title='" . wfStrencode( $nt->getDBkey() ) . "')";
}
- $sql = "SELECT cur_title, cur_id FROM cur WHERE " . implode(" OR ", $parts);
+ $sql = "SELECT cur_namespace, cur_title, cur_id FROM cur_fast WHERE " .
+ implode(" OR ", $parts);
$res = wfQuery( $sql, DB_WRITE );
while($row = wfFetchObject( $res ) ){
- $pos = $titles_needing_curdata_pos[$row->cur_title];
+ $pos = $titles_needing_curdata_pos[$row->cur_title . $row->cur_namespace];
$titles_needing_curdata[$pos]->mArticleID = intval($row->cur_id);
}
for( $k = 0; $k < count( $titles_needing_curdata ) ; $k++) {
}
foreach ( $titles_ready_for_insertion as $nt ) {
- $dest = addslashes( $nt->getPrefixedDBkey() );
+ $dest_noslashes = $nt->getPrefixedDBkey();
+ $dest = addslashes( $dest_noslashes );
$dest_id = $nt->getArticleID();
$from = $from_full_title_with_slashes;
# print "\nLINK '$from_full_title' ($from_id) -> '$dest' ($dest_id)\n";
- if ( 0 == strncmp( "$ins:", $from_full_title, $inslen ) ) {
- $iname = addslashes( substr( $from_full_title, $inslen ) );
+
+ if ( 0 == strncmp( "$ins:", $dest_noslashes, $inslen ) ) {
+ $iname = addslashes( substr( $dest_noslashes, $inslen ) );
$imagelinks_inserter->insert( "('{$from}','{$iname}')" );
} else if ( 0 == $dest_id ) {
$brokenlinks_inserter->insert( "({$from_id},'{$dest}')" );