3 ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
11 use vars
qw(%table %tz %special @torder $COM);
14 ## The following options can be changed via command line arguments:
18 ## If the following are zero-length, we omit their arguments entirely:
20 my $MYSQLPASSWORD = '';
23 ## Name of the dump file created
24 my $MYSQLDUMPFILE = "mediawiki_upgrade.pg";
26 ## How verbose should this script be (0, 1, or 2)
32 Usage: $0 --db=<dbname> --user=<user> [OPTION]...
33 Example: $0 --db=wikidb --user=wikiuser --pass=sushi
35 Converts a MediaWiki schema from MySQL to Postgres
37 db Name of the MySQL database
38 user MySQL database username
39 pass MySQL database password
40 host MySQL database host
41 socket MySQL database socket
42 verbose Verbosity, increases with multiple uses
48 "user=s" => \$MYSQLUSER,
49 "pass=s" => \$MYSQLPASSWORD,
50 "host=s" => \$MYSQLHOST,
51 "socket=s" => \$MYSQLSOCKET,
52 "verbose+" => \$verbose,
58 or ! length $MYSQLUSER
61 ## The Postgres schema file: should not be changed
62 my $PG_SCHEMA = "tables.sql";
64 ## What version we default to when we can't parse the old schema
65 my $MW_DEFAULT_VERSION = '1.10';
67 ## Try and find a working version of mysqldump
68 $verbose and warn "Locating the mysqldump executable\n";
69 my @MYSQLDUMP = ("/usr/local/bin/mysqldump", "/usr/bin/mysqldump");
71 for my $mytry (@MYSQLDUMP) {
73 -x $mytry or die qq{Not an executable file: "$mytry"\n};
74 my $version = qx{$mytry -V};
75 $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
78 $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
80 ## Flags we use for mysqldump
81 my @MYSQLDUMPARGS = qw(
84 --skip-extended-insert
94 $verbose and warn "Checking that mysqldump can handle our flags\n";
95 ## Make sure this version can handle all the flags we want.
96 ## Combine with user dump below
97 my $MYSQLDUMPARGS = join " " => @MYSQLDUMPARGS;
98 ## Argh. Any way to make this work on Win32?
99 my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
100 if ($version =~ /unknown option/) {
101 die qq{Sorry
, you need to
use a newer version of the mysqldump program than the one at
"$MYSQLDUMP"\n};
104 push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
105 length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
106 length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
108 ## Open the dump file to hold the mysqldump output
109 open my $mdump, "+>", $MYSQLDUMPFILE or die qq{Could
not open "$MYSQLDUMPFILE": $!\n};
110 print qq{Writing file
"$MYSQLDUMPFILE"\n};
112 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, "--no-data", $MYSQLDB;
113 my $oldselect = select $mdump;
115 print while <$mfork2>;
117 ## Slurp in the current schema
122 $current_schema = <$mdump>;
127 warn qq{Trying to determine database version
...\n} if $verbose;
129 my $current_version = 0;
130 if ($current_schema =~ /CREATE TABLE \S+cur /) {
131 $current_version = '1.3';
133 elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
134 $current_version = '1.4';
136 elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
137 $current_version = '1.5';
139 elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
140 $current_version = '1.6';
142 elsif ($current_schema !~ /ipb_auto tinyint/) {
143 $current_version = '1.7';
145 elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
146 $current_version = '1.8';
148 elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) {
149 $current_version = '1.9';
152 $current_version = '$MW_DEFAULT_VERSION';
155 if (!$current_version) {
156 warn qq{WARNING
! Could
not figure out the old version
, assuming MediaWiki
$MW_DEFAULT_VERSION\n};
157 $current_version = $MW_DEFAULT_VERSION;
160 ## Check for a table prefix:
161 my $table_prefix = '';
162 if ($current_version =~ /CREATE TABLE (\S+)archive /) {
166 warn qq{Old schema is from MediaWiki version
$current_version\n} if $verbose;
167 warn qq{Table prefix is
"$table_prefix"\n} if $verbose and length $table_prefix;
169 $verbose and warn qq{Writing file
"$MYSQLDUMPFILE"\n};
170 my $now = scalar localtime();
172 $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
173 $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
176 -- Dump of MySQL Mediawiki tables
for import into a Postgres Mediawiki schema
177 -- Performed by the program
: $0
178 -- Version
: $VERSION (subversion
}.q{$LastChangedRevision$}.qq{)
179 -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
181 -- This file was created: $now
182 -- Executable used: $MYSQLDUMP
183 -- Connection information:
184 -- database: $MYSQLDB
185 -- user: $MYSQLUSER$conninfo
187 -- This file can be imported manually with psql like so:
188 -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
189 -- This will overwrite any existing MediaWiki information, so be careful
193 ## psql specific stuff
196 SET client_min_messages = 'WARNING';
199 warn qq{Reading in the Postgres schema information\n} if $verbose;
200 open my $schema, "<", $PG_SCHEMA
201 or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
204 if (/CREATE TABLE\s+(\S+)/) {
208 elsif (/^ +(\w+)\s+TIMESTAMP/) {
211 elsif (/REFERENCES\s*([^( ]+)/) {
213 exists $table{$ref} or die qq{No parent table $ref found for $t\n};
219 ## Read in special cases and table/version information
220 $verbose and warn qq{Reading in schema exception information\n};
223 if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
224 my $list = join '|' => split /\s+/ => $2;
225 $version_tables{$1} = qr{\b$list\b};
228 next unless /^(\w+)\s*(.*)/;
229 $special{$1} = $2||'';
230 $special{$2} = $1 if length $2;
233 ## Determine the order of tables based on foreign key constraints
234 $verbose and warn qq{Figuring out order of tables to dump\n};
239 T: for my $t (sort keys %table) {
240 next if exists $dumped{$t} and $dumped{$t} >= 1;
242 for my $dep (sort keys %{$table{$t}}) {
243 next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
245 $dumped{$t} = -1 if ! exists $dumped{$t};
246 ## Skip certain tables that are not imported
247 next if exists $special{$t} and !$special{$t};
248 push @torder, $special{$t} || $t;
252 for (values %dumped) { $_+=2; }
253 die "Too many loops!\n" if $bail++ > 1000;
257 ## Prepare the Postgres database for the move
258 $verbose and warn qq{Writing Postgres transformation information\n};
260 print "\n-- Empty out all existing tables\n";
261 $verbose and warn qq{Writing truncates to empty existing tables\n};
262 for my $t (@torder "objectcache", "querycache") {
264 my $tname = $special{$t}||$t;
265 printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"};
269 print qq{-- Temporarily rename pagecontent to "text"\n};
270 print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n};
272 print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
273 print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
275 print "-- Changing all timestamp fields to handle raw integers\n";
276 for my $t (sort keys %tz) {
277 next if $t eq "archive2";
278 for my $c (sort keys %{$tz{$t}}) {
279 printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
285 INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
288 ## If we have a table _prefix, we need to temporarily rename all of our Postgres
289 ## tables temporarily for the import. Perhaps consider making this an auto-schema
290 ## thing in the future.
291 if (length $table_prefix) {
292 print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
293 for my $t (@torder) {
295 my $tname = $special{$t}||$t;
296 printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"};
301 ## Try and dump the ill-named "user" table:
302 ## We do this table alone because "user" is a reserved word.
305 SET escape_string_warning TO 'off';
308 -- Postgres uses a table name of "mwuser" instead of "user"
310 -- Create a dummy user to satisfy fk contraints especially with revisions
311 SELECT setval('user_user_id_seq',0,'false');
313 VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
317 push @MYSQLDUMPARGS, "--no-create-info";
319 $verbose and warn qq{Dumping "user" table\n};
320 $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
321 my $usertable = "${table_prefix}user";
322 open my $mfork, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
323 ## Unfortunately, there is no easy way to catch errors
326 ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
330 warn qq{No users found, probably a connection error.\n};
331 print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
335 print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
337 warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
339 ## Dump the rest of the tables, in chunks based on constraints
340 ## We do not need the user table:
341 my @dumplist = grep { $_ ne 'user'} @torder;
346 my $tname = shift @dumplist;
347 ## XXX Make this dynamic below
348 for my $ver (sort {$b <=> $a } keys %version_tables) {
349 redo PICKATABLE if $tname =~ $version_tables{$ver};
351 $tname = "${table_prefix}$tname" if length $table_prefix;
352 next if $tname !~ /^\w/;
354 $verbose and warn " $tname...\n";
355 pop @alist and last if index($alist[-1],'---') >= 0;
359 ## Dump everything else
360 open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
361 print while <$mfork2>;
363 warn qq{Finished dumping from MySQL\n} if $verbose;
368 warn qq{Writing information to return Postgres database to normal\n} if $verbose;
369 print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
370 print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
371 print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
373 ## Return tables to their original names if a table prefix was used.
374 if (length $table_prefix) {
375 print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
378 $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
380 for my $t (@torder) {
381 next if $t eq '---' or $t eq 'text';
382 my $tname = $special{$t}||$t;
383 printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"};
387 print qq{\n\n--Returning timestamps to normal\n};
388 for my $t (sort keys %tz) {
389 next if $t eq "archive2";
390 for my $c (sort keys %{$tz{$t}}) {
391 printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
392 " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
398 SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive;
399 SELECT setval('ipblocks_ipb_id_val', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks;
400 SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job;
401 SELECT setval('log_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging;
402 SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page;
403 SELECT setval('pr_id_val', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions;
404 SELECT setval('rc_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges;
405 SELECT setval('rev_rev_id_val', 1+coalesce(max(rev_id) ,0),false) FROM revision;
406 SELECT setval('text_old_id_val', 1+coalesce(max(old_id) ,0),false) FROM pagecontent;
407 SELECT setval('trackbacks_tb_id_seq', 1+coalesce(max(tb_id) ,0),false) FROM trackbacks;
408 SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser;
411 ## Finally, make a record in the mediawiki_version table about this import
413 INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??',
414 'Imported from file created on $now. Old version: $current_version');
417 print "\\o\n\n-- End of dump\n\n";
424 ## Known remappings: either indicate the MySQL name,
425 ## or leave blank if it should be skipped
433 ## Which tables to ignore depending on the version
434 VERSION 1.5: trackback
435 VERSION 1.6: externallinks job templatelinks transcache
436 VERSION 1.7: filearchive langlinks querycache_info
437 VERSION 1.9: querycachetwo page_restrictions redirect