*Generalize function to list UI names for all groups with a permission into OutputPag...
[lhc/web/wiklou.git] / maintenance / postgres / mediawiki_mysql2postgres.pl
1 #!/usr/bin/perl
2
3 ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
4 ## svn: $Id$
5
6 use strict;
7 use warnings;
8 use Data::Dumper;
9 use Getopt::Long;
10
11 use vars qw(%table %tz %special @torder $COM);
12 my $VERSION = '1.2';
13
14 ## The following options can be changed via command line arguments:
15 my $MYSQLDB = '';
16 my $MYSQLUSER = '';
17
18 ## If the following are zero-length, we omit their arguments entirely:
19 my $MYSQLHOST = '';
20 my $MYSQLPASSWORD = '';
21 my $MYSQLSOCKET = '';
22
23 ## Name of the dump file created
24 my $MYSQLDUMPFILE = 'mediawiki_upgrade.pg';
25
26 ## How verbose should this script be (0, 1, or 2)
27 my $verbose = 0;
28
29 my $help = 0;
30
31 my $USAGE = "
32 Usage: $0 --db=<dbname> --user=<user> [OPTION]...
33 Example: $0 --db=wikidb --user=wikiuser --pass=sushi
34
35 Converts a MediaWiki schema from MySQL to Postgres
36 Options:
37 db Name of the MySQL database
38 user MySQL database username
39 pass MySQL database password
40 host MySQL database host
41 socket MySQL database socket
42 verbose Verbosity, increases with multiple uses
43 ";
44
45 GetOptions
46 (
47 'db=s' => \$MYSQLDB,
48 'user=s' => \$MYSQLUSER,
49 'pass=s' => \$MYSQLPASSWORD,
50 'host=s' => \$MYSQLHOST,
51 'socket=s' => \$MYSQLSOCKET,
52 'verbose+' => \$verbose,
53 'help' => \$help,
54 );
55
56 die $USAGE
57 if ! length $MYSQLDB
58 or ! length $MYSQLUSER
59 or $help;
60
61 ## The Postgres schema file: should not be changed
62 my $PG_SCHEMA = 'tables.sql';
63
64 ## What version we default to when we can't parse the old schema
65 my $MW_DEFAULT_VERSION = 110;
66
67 ## Try and find a working version of mysqldump
68 $verbose and warn "Locating the mysqldump executable\n";
69 my @MYSQLDUMP = ('/usr/local/bin/mysqldump', '/usr/bin/mysqldump');
70 my $MYSQLDUMP;
71 for my $mytry (@MYSQLDUMP) {
72 next if ! -e $mytry;
73 -x $mytry or die qq{Not an executable file: "$mytry"\n};
74 my $version = qx{$mytry -V};
75 $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
76 $MYSQLDUMP = $mytry;
77 }
78 $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
79
80 ## Flags we use for mysqldump
81 my @MYSQLDUMPARGS = qw(
82 --skip-lock-tables
83 --complete-insert
84 --skip-extended-insert
85 --skip-add-drop-table
86 --skip-add-locks
87 --skip-disable-keys
88 --skip-set-charset
89 --skip-comments
90 --skip-quote-names
91 );
92
93
94 $verbose and warn "Checking that mysqldump can handle our flags\n";
95 ## Make sure this version can handle all the flags we want.
96 ## Combine with user dump below
97 my $MYSQLDUMPARGS = join ' ' => @MYSQLDUMPARGS;
98 ## Argh. Any way to make this work on Win32?
99 my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
100 if ($version =~ /unknown option/) {
101 die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n};
102 }
103
104 push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
105 length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
106 length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
107
108 ## Open the dump file to hold the mysqldump output
109 open my $mdump, '+>', $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n};
110 print qq{Writing file "$MYSQLDUMPFILE"\n};
111
112 open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, '--no-data', $MYSQLDB;
113 my $oldselect = select $mdump;
114
115 print while <$mfork2>;
116
117 ## Slurp in the current schema
118 my $current_schema;
119 seek $mdump, 0, 0;
120 {
121 local $/;
122 $current_schema = <$mdump>;
123 }
124 seek $mdump, 0, 0;
125 truncate $mdump, 0;
126
127 warn qq{Trying to determine database version...\n} if $verbose;
128
129 my $current_version = 0;
130 if ($current_schema =~ /CREATE TABLE \S+cur /) {
131 $current_version = 103;
132 }
133 elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
134 $current_version = 104;
135 }
136 elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
137 $current_version = 105;
138 }
139 elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
140 $current_version = 106;
141 }
142 elsif ($current_schema !~ /ipb_auto tinyint/) {
143 $current_version = 107;
144 }
145 elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
146 $current_version = 108;
147 }
148 elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) {
149 $current_version = 109;
150 }
151 else {
152 $current_version = $MW_DEFAULT_VERSION;
153 }
154
155 if (!$current_version) {
156 warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n};
157 $current_version = $MW_DEFAULT_VERSION;
158 }
159
160 ## Check for a table prefix:
161 my $table_prefix = '';
162 if ($current_schema =~ /CREATE TABLE (\S+)querycache /) {
163 $table_prefix = $1;
164 }
165
166 warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose;
167 warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix;
168
169 $verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
170 my $now = scalar localtime;
171 my $conninfo = '';
172 $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
173 $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
174
175 print qq{
176 -- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema
177 -- Performed by the program: $0
178 -- Version: $VERSION (subversion }.q{$LastChangedRevision$}.qq{)
179 -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
180 --
181 -- This file was created: $now
182 -- Executable used: $MYSQLDUMP
183 -- Connection information:
184 -- database: $MYSQLDB
185 -- user: $MYSQLUSER$conninfo
186
187 -- This file can be imported manually with psql like so:
188 -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
189 -- This will overwrite any existing MediaWiki information, so be careful
190
191 };
192
193 ## psql specific stuff
194 print q{
195 \\set ON_ERROR_STOP
196 BEGIN;
197 SET client_min_messages = 'WARNING';
198 SET timezone = 'GMT';
199 };
200
201 warn qq{Reading in the Postgres schema information\n} if $verbose;
202 open my $schema, '<', $PG_SCHEMA
203 or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
204 my $t;
205 while (<$schema>) {
206 if (/CREATE TABLE\s+(\S+)/) {
207 $t = $1;
208 $table{$t}={};
209 $verbose > 1 and warn qq{ Found table $t\n};
210 }
211 elsif (/^ +(\w+)\s+TIMESTAMP/) {
212 $tz{$t}{$1}++;
213 $verbose > 1 and warn qq{ Got a timestamp for column $1\n};
214 }
215 elsif (/REFERENCES\s*([^( ]+)/) {
216 my $ref = $1;
217 exists $table{$ref} or die qq{No parent table $ref found for $t\n};
218 $table{$t}{$ref}++;
219 }
220 }
221 close $schema or die qq{Could not close "$PG_SCHEMA": $!\n};
222
223 ## Read in special cases and table/version information
224 $verbose and warn qq{Reading in schema exception information\n};
225 my %version_tables;
226 while (<DATA>) {
227 if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
228 my $list = join '|' => split /\s+/ => $2;
229 $version_tables{$1} = qr{\b$list\b};
230 next;
231 }
232 next unless /^(\w+)\s*(.*)/;
233 $special{$1} = $2||'';
234 $special{$2} = $1 if length $2;
235 }
236
237 ## Determine the order of tables based on foreign key constraints
238 $verbose and warn qq{Figuring out order of tables to dump\n};
239 my %dumped;
240 my $bail = 0;
241 {
242 my $found=0;
243 T: for my $t (sort keys %table) {
244 next if exists $dumped{$t} and $dumped{$t} >= 1;
245 $found=1;
246 for my $dep (sort keys %{$table{$t}}) {
247 next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
248 }
249 $dumped{$t} = -1 if ! exists $dumped{$t};
250 ## Skip certain tables that are not imported
251 next if exists $special{$t} and !$special{$t};
252 push @torder, $special{$t} || $t;
253 }
254 last if !$found;
255 push @torder, '---';
256 for (values %dumped) { $_+=2; }
257 die "Too many loops!\n" if $bail++ > 1000;
258 redo;
259 }
260
261 ## Prepare the Postgres database for the move
262 $verbose and warn qq{Writing Postgres transformation information\n};
263
264 print "\n-- Empty out all existing tables\n";
265 $verbose and warn qq{Writing truncates to empty existing tables\n};
266
267
268 for my $t (@torder, 'objectcache', 'querycache') {
269 next if $t eq '---';
270 my $tname = $special{$t}||$t;
271 printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"};
272 }
273 print "\n\n";
274
275 print qq{-- Temporarily rename pagecontent to "text"\n};
276 print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n};
277
278 print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
279 print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
280
281 print "-- Changing all timestamp fields to handle raw integers\n";
282 for my $t (sort keys %tz) {
283 next if $t eq 'archive2';
284 for my $c (sort keys %{$tz{$t}}) {
285 printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
286 }
287 }
288 print "\n";
289
290 print q{
291 INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
292 };
293
294 ## If we have a table _prefix, we need to temporarily rename all of our Postgres
295 ## tables temporarily for the import. Perhaps consider making this an auto-schema
296 ## thing in the future.
297 if (length $table_prefix) {
298 print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
299 for my $t (@torder) {
300 next if $t eq '---';
301 my $tname = $special{$t}||$t;
302 printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"};
303 }
304 }
305
306
307 ## Try and dump the ill-named "user" table:
308 ## We do this table alone because "user" is a reserved word.
309 print q{
310
311 SET escape_string_warning TO 'off';
312 \\o /dev/null
313
314 -- Postgres uses a table name of "mwuser" instead of "user"
315
316 -- Create a dummy user to satisfy fk contraints especially with revisions
317 SELECT setval('user_user_id_seq',0,'false');
318 INSERT INTO mwuser
319 VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
320
321 };
322
323 push @MYSQLDUMPARGS, '--no-create-info';
324
325 $verbose and warn qq{Dumping "user" table\n};
326 $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
327 my $usertable = "${table_prefix}user";
328 open my $mfork, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
329 ## Unfortunately, there is no easy way to catch errors
330 my $numusers = 0;
331 while (<$mfork>) {
332 ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
333 }
334 close $mfork;
335 if ($numusers < 1) {
336 warn qq{No users found, probably a connection error.\n};
337 print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
338 close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
339 exit;
340 }
341 print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
342
343 warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
344
345 ## Dump the rest of the tables, in chunks based on constraints
346 ## We do not need the user table:
347 my @dumplist = grep { $_ ne 'user'} @torder;
348 my @alist;
349 {
350 undef @alist;
351 PICKATABLE: {
352 my $tname = shift @dumplist;
353 ## XXX Make this dynamic below
354 for my $ver (sort {$b <=> $a } keys %version_tables) {
355 redo PICKATABLE if $tname =~ $version_tables{$ver};
356 }
357 $tname = "${table_prefix}$tname" if length $table_prefix;
358 next if $tname !~ /^\w/;
359 push @alist, $tname;
360 $verbose and warn " $tname...\n";
361 pop @alist and last if index($alist[-1],'---') >= 0;
362 redo if @dumplist;
363 }
364
365 ## Dump everything else
366 open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
367 print while <$mfork2>;
368 close $mfork2;
369 warn qq{Finished dumping from MySQL\n} if $verbose;
370
371 redo if @dumplist;
372 }
373
374 warn qq{Writing information to return Postgres database to normal\n} if $verbose;
375 print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
376 print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
377 print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
378
379 ## Return tables to their original names if a table prefix was used.
380 if (length $table_prefix) {
381 print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
382 my $maxsize = 18;
383 for (@torder) {
384 $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
385 }
386 for my $t (@torder) {
387 next if $t eq '---' or $t eq 'text';
388 my $tname = $special{$t}||$t;
389 printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"};
390 }
391 }
392
393 print qq{\n\n--Returning timestamps to normal\n};
394 for my $t (sort keys %tz) {
395 next if $t eq 'archive2';
396 for my $c (sort keys %{$tz{$t}}) {
397 printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
398 " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
399 }
400 }
401
402 ## Reset sequences
403 print q{
404 SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive;
405 SELECT setval('ipblocks_ipb_id_val', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks;
406 SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job;
407 SELECT setval('log_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging;
408 SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page;
409 SELECT setval('pr_id_val', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions;
410 SELECT setval('rc_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges;
411 SELECT setval('rev_rev_id_val', 1+coalesce(max(rev_id) ,0),false) FROM revision;
412 SELECT setval('text_old_id_val', 1+coalesce(max(old_id) ,0),false) FROM pagecontent;
413 SELECT setval('trackbacks_tb_id_seq', 1+coalesce(max(tb_id) ,0),false) FROM trackbacks;
414 SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser;
415 };
416
417 ## Finally, make a record in the mediawiki_version table about this import
418 print qq{
419 INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??',
420 'Imported from file created on $now. Old version: $current_version');
421 };
422
423 print "COMMIT;\n\\o\n\n-- End of dump\n\n";
424 select $oldselect;
425 close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
426 exit;
427
428
429 __DATA__
430 ## Known remappings: either indicate the MySQL name,
431 ## or leave blank if it should be skipped
432 pagecontent text
433 mwuser user
434 mediawiki_version
435 archive2
436 profiling
437 objectcache
438
439 ## Which tables to ignore depending on the version
440 VERSION 1.5: trackback
441 VERSION 1.6: externallinks job templatelinks transcache
442 VERSION 1.7: filearchive langlinks querycache_info
443 VERSION 1.9: querycachetwo page_restrictions redirect
444