3 ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
11 use vars
qw(%table %tz %special @torder $COM);
14 ## The following options can be changed via command line arguments:
18 ## If the following are zero-length, we omit their arguments entirely:
20 my $MYSQLPASSWORD = '';
23 ## Name of the dump file created
24 my $MYSQLDUMPFILE = 'mediawiki_upgrade.pg';
26 ## How verbose should this script be (0, 1, or 2)
32 Usage: $0 --db=<dbname> --user=<user> [OPTION]...
33 Example: $0 --db=wikidb --user=wikiuser --pass=sushi
35 Converts a MediaWiki schema from MySQL to Postgres
37 db Name of the MySQL database
38 user MySQL database username
39 pass MySQL database password
40 host MySQL database host
41 socket MySQL database socket
42 verbose Verbosity, increases with multiple uses
48 'user=s' => \$MYSQLUSER,
49 'pass=s' => \$MYSQLPASSWORD,
50 'host=s' => \$MYSQLHOST,
51 'socket=s' => \$MYSQLSOCKET,
52 'verbose+' => \$verbose,
58 or ! length $MYSQLUSER
61 ## The Postgres schema file: should not be changed
62 my $PG_SCHEMA = 'tables.sql';
64 ## What version we default to when we can't parse the old schema
65 my $MW_DEFAULT_VERSION = 110;
67 ## Try and find a working version of mysqldump
68 $verbose and warn "Locating the mysqldump executable\n";
69 my @MYSQLDUMP = ('/usr/local/bin/mysqldump', '/usr/bin/mysqldump');
71 for my $mytry (@MYSQLDUMP) {
73 -x $mytry or die qq{Not an executable file: "$mytry"\n};
74 my $version = qx{$mytry -V};
75 $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
78 $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
80 ## Flags we use for mysqldump
81 my @MYSQLDUMPARGS = qw(
84 --skip-extended-insert
94 $verbose and warn "Checking that mysqldump can handle our flags\n";
95 ## Make sure this version can handle all the flags we want.
96 ## Combine with user dump below
97 my $MYSQLDUMPARGS = join ' ' => @MYSQLDUMPARGS;
98 ## Argh. Any way to make this work on Win32?
99 my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
100 if ($version =~ /unknown option/) {
101 die qq{Sorry
, you need to
use a newer version of the mysqldump program than the one at
"$MYSQLDUMP"\n};
104 push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
105 length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
106 length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
108 ## Open the dump file to hold the mysqldump output
109 open my $mdump, '+>', $MYSQLDUMPFILE or die qq{Could
not open "$MYSQLDUMPFILE": $!\n};
110 print qq{Writing file
"$MYSQLDUMPFILE"\n};
112 open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, '--no-data', $MYSQLDB;
113 my $oldselect = select $mdump;
115 print while <$mfork2>;
117 ## Slurp in the current schema
122 $current_schema = <$mdump>;
127 warn qq{Trying to determine database version
...\n} if $verbose;
129 my $current_version = 0;
130 if ($current_schema =~ /CREATE TABLE \S+cur /) {
131 $current_version = 103;
133 elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
134 $current_version = 104;
136 elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
137 $current_version = 105;
139 elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
140 $current_version = 106;
142 elsif ($current_schema !~ /ipb_auto tinyint/) {
143 $current_version = 107;
145 elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
146 $current_version = 108;
148 elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) {
149 $current_version = 109;
152 $current_version = $MW_DEFAULT_VERSION;
155 if (!$current_version) {
156 warn qq{WARNING
! Could
not figure out the old version
, assuming MediaWiki
$MW_DEFAULT_VERSION\n};
157 $current_version = $MW_DEFAULT_VERSION;
160 ## Check for a table prefix:
161 my $table_prefix = '';
162 if ($current_schema =~ /CREATE TABLE (\S+)querycache /) {
166 warn qq{Old schema is from MediaWiki version
$current_version\n} if $verbose;
167 warn qq{Table prefix is
"$table_prefix"\n} if $verbose and length $table_prefix;
169 $verbose and warn qq{Writing file
"$MYSQLDUMPFILE"\n};
170 my $now = scalar localtime;
172 $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
173 $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
176 -- Dump of MySQL Mediawiki tables
for import into a Postgres Mediawiki schema
177 -- Performed by the program
: $0
178 -- Version
: $VERSION (subversion
}.q{$LastChangedRevision$}.qq{)
179 -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
181 -- This file was created: $now
182 -- Executable used: $MYSQLDUMP
183 -- Connection information:
184 -- database: $MYSQLDB
185 -- user: $MYSQLUSER$conninfo
187 -- This file can be imported manually with psql like so:
188 -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
189 -- This will overwrite any existing MediaWiki information, so be careful
193 ## psql specific stuff
197 SET client_min_messages = 'WARNING';
198 SET timezone = 'GMT';
201 warn qq{Reading in the Postgres schema information\n} if $verbose;
202 open my $schema, '<', $PG_SCHEMA
203 or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
206 if (/CREATE TABLE\s+(\S+)/) {
209 $verbose > 1 and warn qq{ Found table $t\n};
211 elsif (/^ +(\w+)\s+TIMESTAMP/) {
213 $verbose > 1 and warn qq{ Got a timestamp for column $1\n};
215 elsif (/REFERENCES\s*([^( ]+)/) {
217 exists $table{$ref} or die qq{No parent table $ref found for $t\n};
221 close $schema or die qq{Could not close "$PG_SCHEMA": $!\n};
223 ## Read in special cases and table/version information
224 $verbose and warn qq{Reading in schema exception information\n};
227 if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
228 my $list = join '|' => split /\s+/ => $2;
229 $version_tables{$1} = qr{\b$list\b};
232 next unless /^(\w+)\s*(.*)/;
233 $special{$1} = $2||'';
234 $special{$2} = $1 if length $2;
237 ## Determine the order of tables based on foreign key constraints
238 $verbose and warn qq{Figuring out order of tables to dump\n};
243 T: for my $t (sort keys %table) {
244 next if exists $dumped{$t} and $dumped{$t} >= 1;
246 for my $dep (sort keys %{$table{$t}}) {
247 next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
249 $dumped{$t} = -1 if ! exists $dumped{$t};
250 ## Skip certain tables that are not imported
251 next if exists $special{$t} and !$special{$t};
252 push @torder, $special{$t} || $t;
256 for (values %dumped) { $_+=2; }
257 die "Too many loops!\n" if $bail++ > 1000;
261 ## Prepare the Postgres database for the move
262 $verbose and warn qq{Writing Postgres transformation information\n};
264 print "\n-- Empty out all existing tables\n";
265 $verbose and warn qq{Writing truncates to empty existing tables\n};
268 for my $t (@torder, 'objectcache', 'querycache') {
270 my $tname = $special{$t}||$t;
271 printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"};
275 print qq{-- Temporarily rename pagecontent to "text"\n};
276 print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n};
278 print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
279 print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
281 print "-- Changing all timestamp fields to handle raw integers\n";
282 for my $t (sort keys %tz) {
283 next if $t eq 'archive2';
284 for my $c (sort keys %{$tz{$t}}) {
285 printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
291 INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
294 ## If we have a table _prefix, we need to temporarily rename all of our Postgres
295 ## tables temporarily for the import. Perhaps consider making this an auto-schema
296 ## thing in the future.
297 if (length $table_prefix) {
298 print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix
"$table_prefix"\n\n};
299 for my $t (@torder) {
301 my $tname = $special{$t}||$t;
302 printf qq{ALTER TABLE
%-18s RENAME TO
"${table_prefix}$tname"\n}, qq{"$tname"};
307 ## Try and dump the ill-named "user" table:
308 ## We do this table alone because "user" is a reserved word.
311 SET escape_string_warning TO 'off';
314 -- Postgres uses a table name of "mwuser" instead of "user"
316 -- Create a dummy user to satisfy fk contraints especially with revisions
317 SELECT setval('user_user_id_seq',0,'false');
319 VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
323 push @MYSQLDUMPARGS, '--no-create-info';
325 $verbose and warn qq{Dumping "user" table\n};
326 $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
327 my $usertable = "${table_prefix}user";
328 open my $mfork, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
329 ## Unfortunately, there is no easy way to catch errors
332 ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
336 warn qq{No users found, probably a connection error.\n};
337 print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
338 close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
341 print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
343 warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
345 ## Dump the rest of the tables, in chunks based on constraints
346 ## We do not need the user table:
347 my @dumplist = grep { $_ ne 'user'} @torder;
352 my $tname = shift @dumplist;
353 ## XXX Make this dynamic below
354 for my $ver (sort {$b <=> $a } keys %version_tables) {
355 redo PICKATABLE if $tname =~ $version_tables{$ver};
357 $tname = "${table_prefix}$tname" if length $table_prefix;
358 next if $tname !~ /^\w/;
360 $verbose and warn " $tname...\n";
361 pop @alist and last if index($alist[-1],'---') >= 0;
365 ## Dump everything else
366 open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
367 print while <$mfork2>;
369 warn qq{Finished dumping from MySQL\n} if $verbose;
374 warn qq{Writing information to return Postgres database to normal\n} if $verbose;
375 print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
376 print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
377 print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
379 ## Return tables to their original names if a table prefix was used.
380 if (length $table_prefix) {
381 print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
384 $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
386 for my $t (@torder) {
387 next if $t eq '---' or $t eq 'text';
388 my $tname = $special{$t}||$t;
389 printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"};
393 print qq{\n\n--Returning timestamps to normal\n};
394 for my $t (sort keys %tz) {
395 next if $t eq 'archive2';
396 for my $c (sort keys %{$tz{$t}}) {
397 printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
398 " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
404 SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive;
405 SELECT setval('ipblocks_ipb_id_val', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks;
406 SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job;
407 SELECT setval('log_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging;
408 SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page;
409 SELECT setval('pr_id_val', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions;
410 SELECT setval('rc_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges;
411 SELECT setval('rev_rev_id_val', 1+coalesce(max(rev_id) ,0),false) FROM revision;
412 SELECT setval('text_old_id_val', 1+coalesce(max(old_id) ,0),false) FROM pagecontent;
413 SELECT setval('trackbacks_tb_id_seq', 1+coalesce(max(tb_id) ,0),false) FROM trackbacks;
414 SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser;
417 ## Finally, make a record in the mediawiki_version table about this import
419 INSERT INTO mediawiki_version
(type
,mw_version
,notes
) VALUES
('MySQL import','??',
420 'Imported from file created on $now. Old version: $current_version');
423 print "COMMIT;\n\\o\n\n-- End of dump\n\n";
425 close $mdump or die qq{Could
not close "$MYSQLDUMPFILE": $!\n};
430 ## Known remappings: either indicate the MySQL name,
431 ## or leave blank if it should be skipped
439 ## Which tables to ignore depending on the version
440 VERSION
1.5: trackback
441 VERSION
1.6: externallinks job templatelinks transcache
442 VERSION
1.7: filearchive langlinks querycache_info
443 VERSION
1.9: querycachetwo page_restrictions redirect