no need for position:absolute
[lhc/web/wiklou.git] / maintenance / convertUtf8.php
1 <?php
2 /**
3 * @package MediaWiki
4 * @subpackage Maintenance
5 */
6
7 die("This file is not complete; it's checked in so I don't forget it.");
8
9 /**
10 * UTF-8 conversion of DOOOOOOOM
11 *
12 * 1. Lock the wiki
13 * 2. Make a convertlist of all pages
14 * 3. Enable CONVERTLOCK mode and switch to UTF-8
15 * 4. As quick as possible, convert the cur, images, *links, user, etc tables.
16 * Clear cache tables.
17 * 5. Unlock the wiki. Attempts to access pages on the convertlist will be
18 * trapped to read-only.
19 * 6. Go through the list, fixing up old revisions. Remove pages from the
20 * convertlist.
21 */
22
23 /**
24 * @package MediaWiki
25 * @subpackage Maintenance
26 */
27 class UtfUpdater {
28 /** Constructor, set the database */
29 function UtfUpdater() {
30 $this->db =& wfGetDB( DB_MASTER );
31 }
32
33 /**
34 * @param string $string A string to be converted to UTF-8
35 */
36 function toUtf8( $string ) {
37 if( function_exists( 'iconv' ) ) {
38 # There are likely to be Windows code page 1252 chars in there.
39 # Convert them to the proper UTF-8 chars if possible.
40 return iconv( 'CP1252', 'UTF-8', $string );
41 } else {
42 # Will work from plain iso 8859-1 and may corrupt these chars
43 return utf8_encode( $string );
44 }
45 }
46
47 /**
48 * Truncate a table.
49 * @param string $table The table name to be truncated
50 */
51 function clearTable( $table ) {
52 print "Clearing $table...\n";
53 $tableName = $this->db->tableName( $table );
54 $this->db->query( 'TRUNCATE $tableName' );
55 }
56
57 /**
58 * @param string $table Table to be converted
59 * @param string $key Primary key, to identify fields in the UPDATE. If NULL, all fields will be used to match.
60 * @param array $fields List of all fields to grab and convert. If null, will assume you want the $key, and will ask for DISTINCT.
61 * @param array $timestamp A field which should be updated to the current timestamp on changed records.
62 * @param callable $callback
63 * @access private
64 */
65 function convertTable( $table, $key, $fields = null, $timestamp = null, $callback = null ) {
66 $fname = 'UtfUpdater::convertTable';
67 if( $fields ) {
68 $distinct = '';
69 } else {
70 # If working on one key only, there will be multiple rows.
71 # Use DISTINCT to return only one and save us some trouble.
72 $fields = array( $key );
73 $distinct = 'DISTINCT';
74 }
75 $condition = '';
76 foreach( $fields as $field ) {
77 if( $condition ) $condition .= ' OR ';
78 $condition .= "$field RLIKE '[\x80-\xff]'";
79 }
80 $res = $this->db->selectArray(
81 $table,
82 array_merge( $fields, array( $key ) ),
83 $condition,
84 $fname,
85 $distinct );
86 print "Converting " . $this->db->numResults( $res ) . " rows from $table:\n";
87 $n = 0;
88 while( $s = $this->db->fetchObject( $res ) ) {
89 $set = array();
90 foreach( $fields as $field ) {
91 $set[] = $this->toUtf8( $s->$field );
92 }
93 if( $timestamp ) {
94 $set[$timestamp] = $this->db->timestamp();
95 }
96 if( $key ) {
97 $keyCond = array( $key, $s->$key );
98 } else {
99 $keyCond = array();
100 foreach( $fields as $field ) {
101 $keyCond[$field] = $s->$field;
102 }
103 }
104 $this->db->updateArray(
105 $table,
106 $set,
107 $keyCond,
108 $fname );
109 if( ++$n % 100 == 0 ) echo "$n\n";
110
111 if( is_callable( $callback ) ) {
112 call_user_func( $callback, $s );
113 }
114 }
115 echo "$n done.\n";
116 $this->db->freeResult( $res );
117 }
118
119 /**
120 * @param object $row
121 * @access private
122 */
123 function imageRenameCallback( $row ) {
124 $this->renameFile( $row->img_name, 'wfImageDir' );
125 }
126
127 /**
128 * @param object $row
129 * @access private
130 */
131 function oldimageRenameCallback( $row ) {
132 $this->renameFile( $row->oi_archive_name, 'wfImageArchiveDir' );
133 }
134
135 /**
136 * Rename a given image or archived image file to the converted filename,
137 * leaving a symlink for URL compatibility.
138 *
139 * @param string $oldname pre-conversion filename
140 * @param callable $subdirCallback a function to generate hashed directories
141 * @access private
142 */
143 function renameFile( $oldname, $subdirCallback ) {
144 $newname = $this->toUtf8( $oldname );
145 if( $newname == $oldname ) {
146 // No need to rename; another field triggered this row.
147 return;
148 }
149
150 $oldpath = call_user_func( $subdirCallback, $oldname ) . '/' . $oldname;
151 $newpath = call_user_func( $subdirCallback, $newname ) . '/' . $newname;
152
153 echo "Renaming $oldpath to $newpath... ";
154 if( rename( $oldpath, $newpath ) ) {
155 echo "ok\n";
156 echo "Creating compatibility symlink from $newpath to $oldpath... ";
157 if( symlink( $newpath, $oldpath ) ) {
158 echo "ok\n";
159 } else {
160 echo " symlink failed!\n";
161 }
162 } else {
163 echo " rename failed!\n";
164 }
165 }
166
167 /**
168 * Lock tables.
169 * @param array $tables An array of table to be locked.
170 */
171 function lockTables( $tables ) {
172 $query = '';
173 foreach( $tables as $table ) {
174 $tableName = $this->db->tableName( $table );
175 if( $query ) $query .= ', ';
176 $query .= '$tableName WRITE';
177 }
178 $this->db->query( 'LOCK TABLES ' . $query );
179 }
180
181 /**
182 * @todo document
183 */
184 function updateAll() {
185 $this->lockTables( array(
186 'objectcache', 'searchindex', 'querycache',
187 'ipblocks', 'user', 'page', 'revision', 'recentchanges',
188 'brokenlinks', 'categorylinks', 'imagelinks', 'watchlist',
189 'image', 'oldimage', 'archive' ) );
190
191 # These are safe to clear out:
192 $this->clearTable( 'objectcache' );
193
194 # These need to be rebuild if used:
195 $this->clearTable( 'searchindex' );
196 $this->clearTable( 'querycache' );
197
198 # And convert the rest...
199 $this->convertTable( 'ipblocks', 'ipb_id', array( 'ipb_reason' ) );
200 $this->convertTable( 'user', 'user_id',
201 array( 'user_name', 'user_real_name', 'user_options' ),
202 'user_touched' );
203 $this->convertTable( 'page', 'page_id',
204 array( 'page_title' ), 'page_touched' );
205 $this->convertTable( 'revision', 'rev_id',
206 array( 'rev_user_text', 'rev_comment' ) );
207
208 $this->convertTable( 'recentchanges', 'rc_id',
209 array( 'rc_user_text', 'rc_title', 'rc_comment' ) );
210
211 $this->convertTable( 'pagelinks', 'pl_title' );
212 $this->convertTable( 'categorylinks', 'cl_to' );
213 $this->convertTable( 'imagelinks', 'il_to' );
214 $this->convertTable( 'watchlist', 'wl_title' );
215
216 # We'll also need to change the files.
217 $this->convertTable( 'image', 'img_name',
218 array( 'img_name', 'img_description', 'img_user_text' ),
219 null,
220 array( &$this, 'imageRenameCallback' ) );
221 $this->convertTable( 'oldimage', 'archive_name',
222 array( 'oi_name', 'oi_archive_name', 'oi_description', 'oi_user_text' ),
223 null,
224 array( &$this, 'oldimageRenameCallback' ) );
225
226 # Don't change the ar_text entries; use $wgLegacyEncoding to read them at runtime
227 $this->convertTable( 'archive', null,
228 array( 'ar_title', 'ar_comment', 'ar_user_text' ) );
229 echo "Not converting text table: be sure to set \$wgLegacyEncoding!\n";
230
231 $this->db->query( 'UNLOCK TABLES' );
232 }
233
234 }
235 ?>