* Missing items
[lhc/web/wiklou.git] / maintenance / postgres / wp_mysql2postgres.pl
1 #!/usr/bin/perl
2
3 ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
4 ## svn: $Id$
5
6 use strict;
7 use warnings;
8 use Data::Dumper;
9 use Getopt::Long;
10
11 use vars qw(%table %tz %special @torder $COM);
12 my $VERSION = "1.0";
13
14 ## The following options can be changed via command line arguments:
15 my $MYSQLDB = 'wikidb';
16 my $MYSQLUSER = 'wikiuser';
17
18 ## If the following are zero-length, we omit their arguments entirely:
19 my $MYSQLHOST = '';
20 my $MYSQLPASSWORD = '';
21 my $MYSQLSOCKET = '';
22
23 ## Name of the dump file created
24 my $MYSQLDUMPFILE = "mediawiki_upgrade.pg";
25
26 ## How verbose should this script be (0, 1, or 2)
27 my $verbose = 0;
28
29 my $USAGE = "
30 Usage: $0 [OPTION]...
31 Convert a MediaWiki schema from MySQL to Postgres
32 Example: $0 --db=wikidb --user=wikiuser --pass=sushi
33 Options:
34 db Name of the MySQL database
35 user MySQL database username
36 pass MySQL database password
37 host MySQL database host
38 socket MySQL database socket
39 verbose Verbosity, increases with multiple uses
40 ";
41
42 GetOptions
43 (
44 "db=s" => \$MYSQLDB,
45 "user=s" => \$MYSQLUSER,
46 "pass=s" => \$MYSQLPASSWORD,
47 "host=s" => \$MYSQLHOST,
48 "socket=s" => \$MYSQLSOCKET,
49 "verbose+" => \$verbose
50 );
51
52 ## The Postgres schema file: should not be changed
53 my $PG_SCHEMA = "tables.sql";
54
55 ## What version we default to when we can't parse the old schema
56 my $MW_DEFAULT_VERSION = '1.8';
57
58 ## Try and find a working version of mysqldump
59 $verbose and warn "Locating the mysqldump executable\n";
60 my @MYSQLDUMP = ("/usr/local/bin/mysqldump", "/usr/bin/mysqldump");
61 my $MYSQLDUMP;
62 for my $mytry (@MYSQLDUMP) {
63 next if ! -e $mytry;
64 -x $mytry or die qq{Not an executable file: "$mytry"\n};
65 my $version = qx{$mytry -V};
66 $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
67 $MYSQLDUMP = $mytry;
68 }
69 $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
70
71 ## Flags we use for mysqldump
72 my @MYSQLDUMPARGS = qw(
73 --skip-lock-tables
74 --complete-insert
75 --skip-extended-insert
76 --skip-add-drop-table
77 --skip-add-locks
78 --skip-disable-keys
79 --skip-set-charset
80 --skip-comments
81 --skip-quote-names
82 );
83
84
85 $verbose and warn "Checking that mysqldump can handle our flags\n";
86 ## Make sure this version can handle all the flags we want.
87 ## Combine with user dump below
88 my $MYSQLDUMPARGS = join " " => @MYSQLDUMPARGS;
89 ## Argh. Any way to make this work on Win32?
90 my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
91 if ($version =~ /unknown option/) {
92 die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n};
93 }
94
95 push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
96 length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
97 length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
98
99 ## Open the dump file to hold the mysqldump output
100 open my $mdump, "+>", $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n};
101 $verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
102
103 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, "--no-data", $MYSQLDB;
104 my $oldselect = select $mdump;
105
106 print while <$mfork2>;
107
108 ## Slurp in the current schema
109 my $current_schema;
110 seek $mdump, 0, 0;
111 {
112 local $/;
113 $current_schema = <$mdump>;
114 }
115 seek $mdump, 0, 0;
116 truncate $mdump, 0;
117
118 warn qq{Trying to determine database version...\n} if $verbose;
119
120 my $current_version = 0;
121 if ($current_schema =~ /CREATE TABLE \S+cur /) {
122 $current_version = '1.3';
123 }
124 elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
125 $current_version = '1.4';
126 }
127 elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
128 $current_version = '1.5';
129 }
130 elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
131 $current_version = '1.6';
132 }
133 elsif ($current_schema !~ /ipb_auto tinyint/) {
134 $current_version = '1.7';
135 }
136 else {
137 $current_version = '1.8';
138 }
139
140 if (!$current_version) {
141 warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n};
142 $current_version = $MW_DEFAULT_VERSION;
143 }
144
145 ## Check for a table prefix:
146 my $table_prefix = '';
147 if ($current_version =~ /CREATE TABLE (\S+)archive /) {
148 $table_prefix = $1;
149 }
150
151 warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose;
152 warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix;
153
154 $verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
155 my $now = scalar localtime();
156 my $conninfo = '';
157 $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
158 $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
159
160 print qq{
161 -- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema
162 -- Performed by the program: $0
163 -- Version: $VERSION (subversion }.q{$LastChangedRevision$}.qq{)
164 -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
165 --
166 -- This file was created: $now
167 -- Executable used: $MYSQLDUMP
168 -- Connection information:
169 -- database: $MYSQLDB
170 -- user: $MYSQLUSER$conninfo
171
172 -- This file can be imported manually with psql like so:
173 -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
174 -- This will overwrite any existing MediaWiki information, so be careful
175
176
177 };
178
179 warn qq{Reading in the Postgres schema information\n} if $verbose;
180 open my $schema, "<", $PG_SCHEMA
181 or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
182 my $t;
183 while (<$schema>) {
184 if (/CREATE TABLE\s+(\S+)/) {
185 $t = $1;
186 $table{$t}={};
187 }
188 elsif (/^ +(\w+)\s+TIMESTAMP/) {
189 $tz{$t}{$1}++;
190 }
191 elsif (/REFERENCES\s*([^( ]+)/) {
192 my $ref = $1;
193 exists $table{$ref} or die qq{No parent table $ref found for $t\n};
194 $table{$t}{$ref}++;
195 }
196 }
197 close $schema;
198
199 ## Read in special cases and table/version information
200 $verbose and warn qq{Reading in schema exception information\n};
201 my %version_tables;
202 while (<DATA>) {
203 if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
204 my $list = join '|' => split /\s+/ => $2;
205 $version_tables{$1} = qr{\b$list\b};
206 next;
207 }
208 next unless /^(\w+)\s*(.*)/;
209 $special{$1} = $2||'';
210 $special{$2} = $1 if length $2;
211 }
212
213 ## Determine the order of tables based on foreign key constraints
214 $verbose and warn qq{Figuring out order of tables to dump\n};
215 my %dumped;
216 my $bail = 0;
217 {
218 my $found=0;
219 T: for my $t (sort keys %table) {
220 next if exists $dumped{$t} and $dumped{$t} >= 1;
221 $found=1;
222 for my $dep (sort keys %{$table{$t}}) {
223 next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
224 }
225 $dumped{$t} = -1 if ! exists $dumped{$t};
226 ## Skip certain tables that are not imported
227 next if exists $special{$t} and !$special{$t};
228 push @torder, $special{$t} || $t;
229 }
230 last if !$found;
231 push @torder, "---";
232 for (values %dumped) { $_+=2; }
233 die "Too many loops!\n" if $bail++ > 1000;
234 redo;
235 }
236
237 ## Prepare the Postgres database for the move
238 $verbose and warn qq{Writing Postgres transformation information\n};
239
240 print "\n-- Empty out all existing tables\n";
241 $verbose and warn qq{Writing truncates to empty existing tables\n};
242 for my $t (@torder) {
243 next if $t eq '---';
244 my $tname = $special{$t}||$t;
245 printf qq{TRUNCATE TABLE %-18s CASCADE;\n}, qq{"$tname"};
246 }
247 print "\n\n";
248
249 print qq{-- Rename the "text" table\n};
250 print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n};
251
252 print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
253 print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
254
255 print "-- Changing all timestamp fields to handle raw integers\n";
256 for my $t (sort keys %tz) {
257 next if $t eq "archive2";
258 for my $c (sort keys %{$tz{$t}}) {
259 printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
260 }
261 }
262 print "\n";
263
264 print qq{
265 INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
266 };
267
268 ## If we have a table _prefix, we need to temporarily rename all of our Postgres
269 ## tables temporarily for the import. Perhaps consider making this an auto-schema
270 ## thing in the future.
271 if (length $table_prefix) {
272 print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
273 for my $t (@torder) {
274 next if $t eq '---';
275 my $tname = $special{$t}||$t;
276 printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"};
277 }
278 }
279
280
281 ## Try and dump the ill-named "user" table:
282 ## We do this table alone because "user" is a reserved word.
283 print qq{
284
285 SET escape_string_warning TO 'off';
286 \\o /dev/null
287
288 -- Postgres uses a table name of "mwuser" instead of "user"
289
290 -- Create a dummy user to satisfy fk contraints especially with revisions
291 SELECT setval('user_user_id_seq',0,'false');
292 INSERT INTO mwuser
293 VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
294
295 };
296
297 push @MYSQLDUMPARGS, "--no-create-info";
298
299 $verbose and warn qq{Dumping "user" table\n};
300 $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
301 my $usertable = "${table_prefix}user";
302 open my $mfork, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
303 ## Unfortunately, there is no easy way to catch errors
304 my $numusers = 0;
305 while (<$mfork>) {
306 ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
307 }
308 close $mfork;
309 if ($numusers < 1) {
310 warn qq{No users found, probably a connection error.\n};
311 print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
312 close $mdump;
313 exit;
314 }
315 print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
316
317 warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
318
319 ## Dump the rest of the tables, in chunks based on constraints
320 ## We do not need the user table:
321 my @dumplist = grep { $_ ne 'user'} @torder;
322 my @alist;
323 {
324 undef @alist;
325 PICKATABLE: {
326 my $tname = shift @dumplist;
327 ## XXX Make this dynamic below
328 for my $ver (sort {$b <=> $a } keys %version_tables) {
329 redo PICKATABLE if $tname =~ $version_tables{$ver};
330 }
331 $tname = "${table_prefix}$tname" if length $table_prefix;
332 push @alist, $tname;
333 pop @alist and last if index($alist[-1],'---') >= 0;
334 redo if @dumplist;
335 }
336
337 ## Dump everything else
338 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
339 print while <$mfork2>;
340 close $mfork2;
341 warn qq{Finished dumping from MySQL\n} if $verbose;
342
343 redo if @dumplist;
344 }
345
346 warn qq{Writing information to return Postgres database to normal\n} if $verbose;
347 print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
348 print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
349 print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
350
351 ## Return tables to their original names if a table prefix was used.
352 if (length $table_prefix) {
353 print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
354 my $maxsize = 18;
355 for (@torder) {
356 $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
357 }
358 for my $t (@torder) {
359 next if $t eq '---' or $t eq 'text';
360 my $tname = $special{$t}||$t;
361 printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"};
362 }
363 }
364
365 print qq{\n\n--Returning timestamps to normal\n};
366 for my $t (sort keys %tz) {
367 next if $t eq "archive2";
368 for my $c (sort keys %{$tz{$t}}) {
369 printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
370 " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
371 }
372 }
373
374 ## Finally, make a record in the mediawiki_version table about this import
375 print qq{
376 INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??',
377 'Imported from file created on $now. Old version: $current_version');
378 };
379
380
381 print "\\o\n\n-- End of dump\n\n";
382 select $oldselect;
383 close $mdump;
384 exit;
385
386
387 __DATA__
388 ## Known remappings: either indicate the MySQL name,
389 ## or leave blank if it should be skipped
390 pagecontent text
391 mwuser user
392 mediawiki_version
393 archive2
394 profiling
395 objectcache
396
397 ## Which tables to ignore depending on the version
398 VERSION 1.5: trackback
399 VERSION 1.6: externallinks job templatelinks transcache
400 VERSION 1.7: filearchive langlinks querycache_info