8be43362acfd4875e133f23661ec39dd7356394a
[lhc/web/wiklou.git] / maintenance / rebuildlinks.inc
1 <?
2
3 # Functions for rebuilding the link tracking tables; must
4 # be included within a script that also includes the Setup.
5 # See rebuildlinks.php, for example.
6 #
7
8 # Turn this on if you've got memory to burn
9 $wgUseMemoryTables = false;
10
11 function rebuildLinkTablesPass1()
12 {
13 global $wgLang, $wgUseMemoryTables;
14 $count = 0;
15 print "Rebuilding link tables (pass 1).\n";
16
17 $sql = "DROP TABLE IF EXISTS rebuildlinks";
18 wfQuery( $sql, DB_WRITE );
19
20 $sql = "CREATE TABLE rebuildlinks (
21 rl_f_id int(8) unsigned NOT NULL default 0,
22 rl_f_title varchar(255) binary NOT NULL default '',
23 rl_to varchar(255) binary NOT NULL default '',
24 INDEX rl_to (rl_to) )";
25 if( $wgUseMemoryTables ) $sql .= " TYPE=heap";
26 wfQuery( $sql, DB_WRITE );
27
28 $sql = "LOCK TABLES cur READ, rebuildlinks WRITE, interwiki READ, user_newtalk READ";
29 wfQuery( $sql, DB_WRITE );
30
31 $sql = "DELETE FROM rebuildlinks";
32 wfQuery( $sql, DB_WRITE );
33
34 $sql = "SELECT cur_id,cur_namespace,cur_title,cur_text FROM cur";
35 $res = wfQuery( $sql, DB_WRITE );
36 $total = wfNumRows( $res );
37
38 $tc = Title::legalChars();
39 while ( $row = wfFetchObject( $res ) ) {
40 $id = $row->cur_id;
41 $ns = $wgLang->getNsText( $row->cur_namespace );
42 if ( "" == $ns ) {
43 $title = addslashes( $row->cur_title );
44 } else {
45 $title = addslashes( "$ns:{$row->cur_title}" );
46 }
47 $text = $row->cur_text;
48 $numlinks = preg_match_all( "/\\[\\[([{$tc}]+)(]|\\|)/", $text,
49 $m, PREG_PATTERN_ORDER );
50
51 if ( 0 != $numlinks ) {
52 $first = true;
53 $sql = "INSERT INTO rebuildlinks (rl_f_id,rl_f_title,rl_to) VALUES ";
54 for ( $i = 0; $i < $numlinks; ++$i ) {
55 if( preg_match( '/^(http|https|ftp|mailto|news):/', $m[1][$i] ) ) {
56 # an URL link; not for us!
57 continue;
58 }
59 # FIXME: Handle subpage links
60 $nt = Title::newFromText( $m[1][$i] );
61 if (! $nt)
62 {
63 $txt = $m[1][$i];
64 print "error in '$ns:{$row->cur_title}' :\t'$txt'\n";
65 continue;
66 }
67 if( $nt->getInterwiki() != "" ) {
68 # Interwiki links are not stored in the link tables
69 continue;
70 }
71 if( $nt->getNamespace() == Namespace::getSpecial() ) {
72 # Special links not stored in link tables
73 continue;
74 }
75 if( $nt->getNamespace() == Namespace::getMedia() ) {
76 # treat media: links as image: links
77 $nt = Title::makeTitle( Namespace::getImage(), $nt->getDBkey() );
78 }
79
80 if (!$first)
81 $sql .= ",";
82 else
83 $first = false;
84
85 $dest = addslashes( $nt->getPrefixedDBkey() );
86 $sql .= "({$id},'{$title}','{$dest}')";
87 }
88
89 if (! $first) { wfQuery( $sql, DB_WRITE ); }
90 }
91 if ( ( ++$count % 1000 ) == 0 ) {
92 print "$count of $total articles scanned.\n";
93 }
94 }
95 print "$total articles scanned.\n";
96 mysql_free_result( $res );
97
98 $sql = "UNLOCK TABLES";
99 wfQuery( $sql, DB_WRITE );
100 }
101
102 function rebuildLinkTablesPass2()
103 {
104 global $wgLang;
105 $count = 0;
106 print "Rebuilding link tables (pass 2).\n";
107
108 $sql = "LOCK TABLES cur READ, rebuildlinks READ, interwiki READ, " .
109 "links WRITE, brokenlinks WRITE, imagelinks WRITE";
110 wfQuery( $sql, DB_WRITE );
111
112 $sql = "DELETE FROM links";
113 wfQuery( $sql, DB_WRITE );
114
115 $sql = "DELETE FROM brokenlinks";
116 wfQuery( $sql, DB_WRITE );
117
118 $sql = "DELETE FROM imagelinks";
119 wfQuery( $sql, DB_WRITE );
120
121 $ins = $wgLang->getNsText( Namespace::getImage() );
122 $inslen = strlen($ins)+1;
123 $sql = "SELECT rl_f_title,rl_to FROM rebuildlinks " .
124 "WHERE rl_to LIKE '$ins:%'";
125 $res = wfQuery( $sql, DB_WRITE );
126
127 $sql = "INSERT INTO imagelinks (il_from,il_to) VALUES ";
128 $first = true;
129 while ( $row = wfFetchObject( $res ) )
130 {
131 $iname = addslashes( substr( $row->rl_to, $inslen ) );
132 $pname = addslashes( $row->rl_f_title );
133
134 if ( ! $first )
135 $sql .= ",";
136 else
137 $first = false;
138
139 $sql .= "('{$pname}','{$iname}')";
140 }
141 wfFreeResult( $res );
142 if ( ! $first ) { wfQuery( $sql, DB_WRITE ); }
143
144 $sql = "SELECT DISTINCT rl_to FROM rebuildlinks ORDER BY rl_to";
145 $res = wfQuery( $sql, DB_WRITE );
146 $count = 0;
147 $total = wfNumRows( $res );
148
149 while ( $row = wfFetchObject( $res ) ) {
150 if ( 0 == strncmp( "$ins:", $row->rl_to, $inslen ) ) { continue; }
151
152 $nt = Title::newFromDBkey( $row->rl_to );
153 if (! $nt)
154 {
155 print "error pass2: '{$row->rl_to}'\n";
156 continue;
157 }
158 $id = $nt->getArticleID();
159 $to = addslashes( $row->rl_to );
160
161 if ( 0 == $id ) {
162 $sql = "SELECT DISTINCT rl_f_id FROM rebuildlinks WHERE rl_to='{$to}'";
163 $res2 = wfQuery( $sql, DB_WRITE );
164
165 $sql = "INSERT INTO brokenlinks (bl_from,bl_to) VALUES ";
166 $first = true;
167 while ( $row2 = wfFetchObject( $res2 ) )
168 {
169 if (! $first)
170 $sql .= ",";
171 else
172 $first = false;
173
174 $from = $row2->rl_f_id;
175 $sql .= "({$from},'{$to}')";
176 }
177 wfFreeResult( $res2 );
178 if ( ! $first ) { wfQuery( $sql, DB_WRITE ); }
179 } else {
180 $sql = "SELECT DISTINCT rl_f_title FROM rebuildlinks WHERE rl_to='{$to}'";
181 $res2 = wfQuery( $sql, DB_WRITE );
182
183 $sql = "INSERT INTO links (l_from,l_to) VALUES ";
184 $first = true;
185 while ( $row2 = wfFetchObject( $res2 ) )
186 {
187 if (! $first)
188 $sql .= ",";
189 else
190 $first = false;
191
192 $from = addslashes( $row2->rl_f_title );
193 $sql .= "('{$from}',{$id})";
194 }
195 wfFreeResult( $res2 );
196 if ( ! $first ) { wfQuery( $sql, DB_WRITE ); }
197 }
198 if ( ( ++$count % 1000 ) == 0 ) {
199 print "$count of $total titles processed.\n";
200 }
201 }
202 wfFreeResult( $res );
203
204 $sql = "UNLOCK TABLES";
205 wfQuery( $sql, DB_WRITE );
206
207 $sql = "DROP TABLE rebuildlinks";
208 wfQuery( $sql, DB_WRITE );
209 }
210 ?>