Fix to improve speed on some systems
[lhc/web/wiklou.git] / maintenance / rebuildlinks.inc
index 6259182..1d38469 100644 (file)
@@ -5,9 +5,6 @@
 # See rebuildlinks.php, for example.
 #
 
-# Turn this on if you've got memory to burn
-$wgUseMemoryTables = false;
-
 # Buffer this many rows before inserting them all in one sweep. More
 # than about 1000 will probably not increase speed significantly on
 # most setups.
@@ -16,7 +13,7 @@ $wgUseMemoryTables = false;
 function rebuildLinkTables()
 {
        error_reporting (E_ALL);
-       global $wgLang, $wgUseMemoryTables, $wgLinkCache, $rowbuf_size;
+       global $wgLang, $wgLinkCache, $rowbuf_size;
 
        print "This script may take several hours to complete. If you abort during that time,\n";
        print "your wiki will be in an inconsistent state. If you are going to abort, this is\n";
@@ -30,11 +27,18 @@ function rebuildLinkTables()
        print "Setting AUTOCOMMIT=1\n";
        wfQuery("SET SESSION AUTOCOMMIT=1", DB_WRITE);
 
+       print "Extracting often used data from cur (may take a few minutes)\n";
+       $sql = "CREATE TEMPORARY TABLE cur_fast SELECT cur_namespace, cur_title, cur_id FROM cur";
+       wfQuery( $sql, DB_WRITE );
+       $sql = "ALTER TABLE cur_fast ADD INDEX(cur_namespace, cur_title)";
+       wfQuery( $sql, DB_WRITE );
+
        print "Locking tables\n";
-       $sql = "LOCK TABLES cur READ, interwiki READ, user_newtalk READ, " .
+       $sql = "LOCK TABLES cur READ, cur_fast READ, interwiki READ, user_newtalk READ, " .
                "links WRITE, brokenlinks WRITE, imagelinks WRITE";
        wfQuery( $sql, DB_WRITE );
 
+
        print "Deleting old data in links table.\n";
        $sql = "DELETE FROM links";
        wfQuery( $sql, DB_WRITE );
@@ -97,7 +101,7 @@ function rebuildLinkTables()
                $numlinks = preg_match_all( "/\\[\\[([{$tc}]+)(]|\\|)/", $text,
                  $m, PREG_PATTERN_ORDER );
 
-               $seen_links = array(); // seen links in this article
+               $seen_dbtitles = array(); // seen links (normalized and with ns, see below) 
                $titles_ready_for_insertion = array();
                $titles_needing_curdata = array();
                $titles_needing_curdata_pos = array();
@@ -105,12 +109,6 @@ function rebuildLinkTables()
 
                for ( $i = 0 ; $i < $numlinks; ++$i ) {
                        $link = $m[1][$i];
-
-                       // We're only interested in the link once per article
-                       if( isset( $seen_links[$link] ) )
-                               continue;
-                       $seen_links[$link] = 1;
-
                        if( preg_match( '/^(http|https|ftp|mailto|news):/', $m[1][$i] ) ) {
                                # an URL link; not for us!
                                continue;
@@ -119,13 +117,26 @@ function rebuildLinkTables()
                        # FIXME: Handle subpage links
                        $nt = $titleCache->get( $link );
                        if( $nt != false ){
-                               $titles_ready_for_insertion[] = $nt;                            
+                               // Only process each unique link once per page
+                               $nt_key = $nt->getDBkey() . $nt->getNamespace();
+                               if( isset( $seen_dbtitles[$nt_key] ) )
+                                       continue;
+                               $seen_dbtitles[$nt_key] = 1;
+
+                               $titles_ready_for_insertion[] = $nt;
                        } else {
                                $nt = Title::newFromText( $link );
                                if (! $nt) {
-                                       print "\nerror in '$ns:{$from_full_title}': '$link'\n";
+                                       // Invalid link, probably something like "[[  ]]"
                                        continue;
                                }
+                               
+                               // Only process each unique link once per page
+                               $nt_key = $nt->getDBkey() . $nt->getNamespace();
+                               if( isset( $seen_dbtitles[$nt_key] ) )
+                                       continue;
+                               $seen_dbtitles[$nt_key] = 1;
+
                                if( $nt->getInterwiki() != "" ) {
                                        # Interwiki links are not stored in the link tables
                                        continue;
@@ -141,7 +152,7 @@ function rebuildLinkTables()
                                $nt->mArticleID = 0; // assume broken link until proven otherwise
 
                                $pos = array_push($titles_needing_curdata, $nt) - 1;
-                               $titles_needing_curdata_pos[$nt->getDBkey()] = $pos;
+                               $titles_needing_curdata_pos[$nt->getDBkey() . $nt->getNamespace()] = $pos;
                                $links_corresponding_to_titles[] = $link;
                                unset( $link ); // useless outside this loop, but tempting 
                        }
@@ -152,13 +163,13 @@ function rebuildLinkTables()
                        $parts = array();
                        foreach ($titles_needing_curdata as $nt ) {
                                $parts[] = " (cur_namespace = " . $nt->getNamespace() . " AND " .
-                                       "cur_title='" . wfStrencode( $nt->getDBkey() ) . "' AND ".
-                                       "cur_namespace=" . intval( $nt->getNamespace() ) . ")";
+                                       "cur_title='" . wfStrencode( $nt->getDBkey() ) . "')";
                        }
-                       $sql = "SELECT cur_title, cur_id FROM cur WHERE " . implode(" OR ", $parts);
+                       $sql = "SELECT cur_namespace, cur_title, cur_id FROM cur_fast WHERE " . 
+                               implode(" OR ", $parts);
                        $res = wfQuery( $sql, DB_WRITE );
                        while($row = wfFetchObject( $res ) ){
-                               $pos = $titles_needing_curdata_pos[$row->cur_title];
+                               $pos = $titles_needing_curdata_pos[$row->cur_title . $row->cur_namespace];
                                $titles_needing_curdata[$pos]->mArticleID = intval($row->cur_id);
                        }
                        for( $k = 0; $k < count( $titles_needing_curdata ) ; $k++) {
@@ -169,13 +180,15 @@ function rebuildLinkTables()
                }
 
                foreach ( $titles_ready_for_insertion as $nt ) {
-                       $dest = addslashes( $nt->getPrefixedDBkey() );
+                       $dest_noslashes = $nt->getPrefixedDBkey();
+                       $dest = addslashes( $dest_noslashes ); 
                        $dest_id = $nt->getArticleID();
                        $from = $from_full_title_with_slashes;
 
                        # print "\nLINK '$from_full_title' ($from_id) -> '$dest' ($dest_id)\n";
-                       if ( 0 == strncmp( "$ins:", $from_full_title, $inslen ) ) { 
-                               $iname = addslashes( substr( $from_full_title, $inslen ) );
+
+                       if ( 0 == strncmp( "$ins:", $dest_noslashes, $inslen ) ) { 
+                               $iname = addslashes( substr( $dest_noslashes, $inslen ) );
                                $imagelinks_inserter->insert( "('{$from}','{$iname}')" );
                        } else if ( 0 == $dest_id ) {
                                $brokenlinks_inserter->insert( "({$from_id},'{$dest}')" );