3 ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
6 ## NOTE: It is probably easier to dump your wiki using maintenance/dumpBackup.php
7 ## and then import it with maintenance/importDump.php
9 ## If having UTF-8 problems, there are reports that adding --compatible=postgresql
17 use vars
qw(%table %tz %special @torder $COM);
20 ## The following options can be changed via command line arguments:
24 ## If the following are zero-length, we omit their arguments entirely:
26 my $MYSQLPASSWORD = '';
29 ## Name of the dump file created
30 my $MYSQLDUMPFILE = 'mediawiki_upgrade.pg';
32 ## How verbose should this script be (0, 1, or 2)
38 Usage: $0 --db=<dbname> --user=<user> [OPTION]...
39 Example: $0 --db=wikidb --user=wikiuser --pass=sushi
41 Converts a MediaWiki schema from MySQL to Postgres
43 db Name of the MySQL database
44 user MySQL database username
45 pass MySQL database password
46 host MySQL database host
47 socket MySQL database socket
48 verbose Verbosity, increases with multiple uses
54 'user=s' => \$MYSQLUSER,
55 'pass=s' => \$MYSQLPASSWORD,
56 'host=s' => \$MYSQLHOST,
57 'socket=s' => \$MYSQLSOCKET,
58 'verbose+' => \$verbose,
64 or ! length $MYSQLUSER
67 ## The Postgres schema file: should not be changed
68 my $PG_SCHEMA = 'tables.sql';
70 ## What version we default to when we can't parse the old schema
71 my $MW_DEFAULT_VERSION = 110;
73 ## Try and find a working version of mysqldump
74 $verbose and warn "Locating the mysqldump executable\n";
75 my @MYSQLDUMP = ('/usr/local/bin/mysqldump', '/usr/bin/mysqldump');
77 for my $mytry (@MYSQLDUMP) {
79 -x $mytry or die qq{Not an executable file: "$mytry"\n};
80 my $version = qx{$mytry -V};
81 $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
84 $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
86 ## Flags we use for mysqldump
87 my @MYSQLDUMPARGS = qw(
90 --skip-extended-insert
100 $verbose and warn "Checking that mysqldump can handle our flags\n";
101 ## Make sure this version can handle all the flags we want.
102 ## Combine with user dump below
103 my $MYSQLDUMPARGS = join ' ' => @MYSQLDUMPARGS;
104 ## Argh. Any way to make this work on Win32?
105 my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
106 if ($version =~ /unknown option/) {
107 die qq{Sorry
, you need to
use a newer version of the mysqldump program than the one at
"$MYSQLDUMP"\n};
110 push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
111 length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
112 length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
114 ## Open the dump file to hold the mysqldump output
115 open my $mdump, '+>', $MYSQLDUMPFILE or die qq{Could
not open "$MYSQLDUMPFILE": $!\n};
116 print qq{Writing file
"$MYSQLDUMPFILE"\n};
118 open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, '--no-data', $MYSQLDB;
119 my $oldselect = select $mdump;
121 print while <$mfork2>;
123 ## Slurp in the current schema
128 $current_schema = <$mdump>;
133 warn qq{Trying to determine database version
...\n} if $verbose;
135 my $current_version = 0;
136 if ($current_schema =~ /CREATE TABLE \S+cur /) {
137 $current_version = 103;
139 elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
140 $current_version = 104;
142 elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
143 $current_version = 105;
145 elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
146 $current_version = 106;
148 elsif ($current_schema !~ /ipb_auto tinyint/) {
149 $current_version = 107;
151 elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
152 $current_version = 108;
154 elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) {
155 $current_version = 109;
158 $current_version = $MW_DEFAULT_VERSION;
161 if (!$current_version) {
162 warn qq{WARNING
! Could
not figure out the old version
, assuming MediaWiki
$MW_DEFAULT_VERSION\n};
163 $current_version = $MW_DEFAULT_VERSION;
166 ## Check for a table prefix:
167 my $table_prefix = '';
168 if ($current_schema =~ /CREATE TABLE (\S+)querycache /) {
172 warn qq{Old schema is from MediaWiki version
$current_version\n} if $verbose;
173 warn qq{Table prefix is
"$table_prefix"\n} if $verbose and length $table_prefix;
175 $verbose and warn qq{Writing file
"$MYSQLDUMPFILE"\n};
176 my $now = scalar localtime;
178 $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
179 $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
182 -- Dump of MySQL Mediawiki tables
for import into a Postgres Mediawiki schema
183 -- Performed by the program
: $0
184 -- Version
: $VERSION (subversion
}.q{$LastChangedRevision$}.qq{)
185 -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
187 -- This file was created: $now
188 -- Executable used: $MYSQLDUMP
189 -- Connection information:
190 -- database: $MYSQLDB
191 -- user: $MYSQLUSER$conninfo
193 -- This file can be imported manually with psql like so:
194 -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
195 -- This will overwrite any existing MediaWiki information, so be careful
199 ## psql specific stuff
203 SET client_min_messages = 'WARNING';
204 SET timezone = 'GMT';
205 SET DateStyle = 'ISO, YMD';
208 warn qq{Reading in the Postgres schema information\n} if $verbose;
209 open my $schema, '<', $PG_SCHEMA
210 or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
213 if (/CREATE TABLE\s+(\S+)/) {
216 $verbose > 1 and warn qq{ Found table $t\n};
218 elsif (/^ +(\w+)\s+TIMESTAMP/) {
220 $verbose > 1 and warn qq{ Got a timestamp for column $1\n};
222 elsif (/REFERENCES\s*([^( ]+)/) {
224 exists $table{$ref} or die qq{No parent table $ref found for $t\n};
228 close $schema or die qq{Could not close "$PG_SCHEMA": $!\n};
230 ## Read in special cases and table/version information
231 $verbose and warn qq{Reading in schema exception information\n};
234 if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
235 my $list = join '|' => split /\s+/ => $2;
236 $version_tables{$1} = qr{\b$list\b};
239 next unless /^(\w+)\s*(.*)/;
240 $special{$1} = $2||'';
241 $special{$2} = $1 if length $2;
244 ## Determine the order of tables based on foreign key constraints
245 $verbose and warn qq{Figuring out order of tables to dump\n};
250 T: for my $t (sort keys %table) {
251 next if exists $dumped{$t} and $dumped{$t} >= 1;
253 for my $dep (sort keys %{$table{$t}}) {
254 next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
256 $dumped{$t} = -1 if ! exists $dumped{$t};
257 ## Skip certain tables that are not imported
258 next if exists $special{$t} and !$special{$t};
259 push @torder, $special{$t} || $t;
263 for (values %dumped) { $_+=2; }
264 die "Too many loops!\n" if $bail++ > 1000;
268 ## Prepare the Postgres database for the move
269 $verbose and warn qq{Writing Postgres transformation information\n};
271 print "\n-- Empty out all existing tables\n";
272 $verbose and warn qq{Writing truncates to empty existing tables\n};
275 for my $t (@torder, 'objectcache', 'querycache') {
277 my $tname = $special{$t}||$t;
278 printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"};
282 print qq{-- Temporarily rename pagecontent to "${table_prefix}text"\n};
283 print qq{ALTER TABLE pagecontent RENAME TO "${table_prefix}text";\n\n};
285 print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
286 print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
288 print "-- Changing all timestamp fields to handle raw integers\n";
289 for my $t (sort keys %tz) {
290 next if $t eq 'archive2';
291 for my $c (sort keys %{$tz{$t}}) {
292 printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
298 INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
301 ## If we have a table _prefix, we need to temporarily rename all of our Postgres
302 ## tables temporarily for the import. Perhaps consider making this an auto-schema
303 ## thing in the future.
304 if (length $table_prefix) {
305 print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix
"$table_prefix"\n\n};
306 for my $t (@torder) {
307 next if $t eq '---' or $t eq 'text' or $t eq 'user';
308 my $tname = $special{$t}||$t;
309 printf qq{ALTER TABLE
%-18s RENAME TO
"${table_prefix}$tname";\n}, qq{"$tname"};
314 ## Try and dump the ill-named "user" table:
315 ## We do this table alone because "user" is a reserved word.
318 SET escape_string_warning TO 'off';
321 -- Postgres uses a table name of "mwuser" instead of "user"
323 -- Create a dummy user to satisfy fk contraints especially with revisions
324 SELECT setval('user_user_id_seq',0,'false');
326 VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
330 push @MYSQLDUMPARGS, '--no-create-info';
332 $verbose and warn qq{Dumping "user" table\n};
333 $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
334 my $usertable = "${table_prefix}user";
335 open my $mfork, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
336 ## Unfortunately, there is no easy way to catch errors
339 ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
343 warn qq{No users found, probably a connection error.\n};
344 print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
345 close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
348 print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
350 warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
352 ## Dump the rest of the tables, in chunks based on constraints
353 ## We do not need the user table:
354 my @dumplist = grep { $_ ne 'user'} @torder;
359 my $tname = shift @dumplist;
360 ## XXX Make this dynamic below
361 for my $ver (sort {$b <=> $a } keys %version_tables) {
362 redo PICKATABLE if $tname =~ $version_tables{$ver};
364 $tname = "${table_prefix}$tname" if length $table_prefix;
365 next if $tname !~ /^\w/;
367 $verbose and warn " $tname...\n";
368 pop @alist and last if index($alist[-1],'---') >= 0;
372 ## Dump everything else
373 open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
374 print while <$mfork2>;
376 warn qq{Finished dumping from MySQL\n} if $verbose;
381 warn qq{Writing information to return Postgres database to normal\n} if $verbose;
382 print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
383 print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
384 print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
386 ## Return tables to their original names if a table prefix was used.
387 if (length $table_prefix) {
388 print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
391 $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
393 for my $t (@torder) {
394 next if $t eq '---' or $t eq 'text' or $t eq 'user';
395 my $tname = $special{$t}||$t;
396 printf qq{ALTER TABLE %*s RENAME TO "$tname";\n}, $maxsize+1, qq{"${table_prefix}$tname"};
400 print qq{\n\n--Returning timestamps to normal\n};
401 for my $t (sort keys %tz) {
402 next if $t eq 'archive2';
403 for my $c (sort keys %{$tz{$t}}) {
404 printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
405 " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
411 SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive;
412 SELECT setval('ipblocks_ipb_id_seq', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks;
413 SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job;
414 SELECT setval('logging_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging;
415 SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page;
416 SELECT setval('page_restrictions_pr_id_seq', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions;
417 SELECT setval('recentchanges_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges;
418 SELECT setval('revision_rev_id_seq', 1+coalesce(max(rev_id) ,0),false) FROM revision;
419 SELECT setval('text_old_id_seq', 1+coalesce(max(old_id) ,0),false) FROM pagecontent;
420 SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser;
423 print "COMMIT;\n\\o\n\n-- End of dump\n\n";
425 close $mdump or die qq{Could
not close "$MYSQLDUMPFILE": $!\n};
430 ## Known remappings: either indicate the MySQL name,
431 ## or leave blank if it should be skipped
438 ## Which tables to ignore depending on the version
439 VERSION
1.6: externallinks job templatelinks transcache
440 VERSION
1.7: filearchive langlinks querycache_info
441 VERSION
1.9: querycachetwo page_restrictions redirect