Various additions and fixes
[mediawiki.git] / maintenance / archives / convertdb.php
blob31b77a9093a18f82d9e8aa8f9c06d4f04cdbd7b7
1 <?
3 print "This script is obsolete!";
4 print "It is retained in the source here in case some of its
5 code might be useful for ad-hoc conversion tasks, but it is
6 not maintained and probably won't even work as is.";
7 exit();
9 # Database conversion (from May 2002 format). Assumes that
10 # the old tables have been loaded into an empty database from
11 # dump files.
13 global $IP;
14 include_once( "../LocalSettings.php" );
15 include_once( "../AdminSettings.php" );
16 include_once( "$IP/Setup.php" );
18 $wgTitle = Title::newFromText( "Conversion script" );
19 include_once( "./rebuildLinks.inc" );
20 include_once( "./rebuildRecentchanges.inc" );
21 include_once( "./buildTables.inc" );
22 set_time_limit(0);
24 $wgDBuser = "wikiadmin";
25 $wgDBpassword = $wgDBadminpassword;
26 # $wgImageDirectory = "/usr/local/apache/htdocs/wikiimages";
27 $wgImageDirectory = "/usr/local/apache/htdocs/upload";
28 $wgMetaImageDirectory = "/usr/local/apache/htdocs-meta/upload";
30 renameOldTables();
31 buildTables();
32 initializeTables();
34 # convertImageDirectories();
35 convertUserTable();
36 convertOldTable();
37 convertCurTable();
39 buildIndexes();
41 rebuildLinkTablesPass1();
42 rebuildLinkTablesPass2();
44 # This is kinda ugly, could be done cleaner
45 convertImageDirectories();
46 #rebuildLinkTablesPass1();
47 #rebuildLinkTablesPass2();
50 removeOldTables();
52 refillRandom();
53 rebuildRecentChangesTable();
55 print "Done.\n";
56 exit();
58 ########## End of script, beginning of functions.
60 function convertUserTable()
62 $count = 0;
63 print "Converting USER table.\n";
65 $sql = "LOCK TABLES old_user READ, user WRITE";
66 $newres = wfQuery( $sql );
68 $sql = "SELECT user_id,user_name,user_rights,user_password," .
69 "user_email,user_options,user_watch FROM old_user";
70 $oldres = wfQuery( $sql );
72 $sql = "DELETE FROM user";
73 $newres = wfQuery( $sql );
75 $sql = "";
76 while ( $row = mysql_fetch_object( $oldres ) ) {
77 $name = addslashes( fixUserName( $row->user_name ) );
78 if ( "" == $name ) continue; # Don't convert illegal names
80 if ( 0 == ( $count % 10 ) ) {
81 if ( 0 != $count ) { $newres = wfQuery( $sql ); }
83 $sql = "INSERT INTO user (user_id,user_name,user_rights," .
84 "user_password,user_newpassword,user_email,user_options," .
85 "user_watch) VALUES ";
86 } else {
87 $sql .= ",";
89 $ops = addslashes( fixUserOptions( $row->user_options ) );
90 $rights = addslashes( fixUserRights( $row->user_rights ) );
91 $email = addslashes( $row->user_email );
92 $pwd = addslashes( md5( $row->user_password ) );
93 $watch = addslashes( $row->user_watch );
95 $sql .= "({$row->user_id},'{$name}','{$rights}','{$pwd}',''," .
96 "'{$email}','{$ops}','{$watch}')";
98 if ( ( ++$count % 1000 ) == 0 ) {
99 print "$count user records processed.\n";
102 if ( $sql ) { $newres = wfQuery( $sql ); }
104 print "$count user records processed.\n";
105 mysql_free_result( $oldres );
107 $sql = "UNLOCK TABLES";
108 $newres = wfQuery( $sql );
111 # Convert May 2002 version of database into new format.
113 function convertCurTable()
115 $count = $countables = 0;
116 print "Converting CUR table.\n";
118 $sql = "LOCK TABLES old_cur READ, cur WRITE, site_stats WRITE";
119 $newres = wfQuery( $sql );
121 $sql = "SELECT cur_id,cur_title,cur_text,cur_comment,cur_user," .
122 "cur_timestamp,cur_minor_edit,cur_restrictions," .
123 "cur_counter,cur_ind_title,cur_user_text FROM old_cur";
124 $oldres = wfQuery( $sql );
126 $sql = "DELETE FROM cur";
127 wfQuery( $sql );
129 $sql = "DELETE FROM site_stats";
130 wfQuery( $sql );
132 $sql = "";
133 while ( $row = mysql_fetch_object( $oldres ) ) {
134 $nt = Title::newFromDBkey( $row->cur_title );
135 $title = addslashes( $nt->getDBkey() );
136 $ns = $nt->getNamespace();
137 $text = addslashes( convertMediaLinks( $row->cur_text ) );
139 $ititle = addslashes( indexTitle( $nt->getText() ) );
140 $itext = addslashes( indexText( $text, $ititle ) );
142 $com = addslashes( $row->cur_comment );
143 $cr = addslashes( fixUserRights( $row->cur_restrictions ) );
144 $cut = addslashes( $row->cur_user_text );
145 if ( "" == $cut ) { $cut = "Unknown"; }
147 if ( 2 == $row->cur_minor_edit ) { $isnew = 1; }
148 else { $isnew = 0; }
149 if ( 0 != $row->cur_minor_edit ) { $isme = 1; }
150 else { $isme = 0; }
152 # $counter = $row->cur_counter;
153 # if ( ! $counter ) { $counter = 0; }
155 if ( preg_match( "/^#redirect/i", $text ) ) {
156 $redir = 1;
157 $text = fixRedirect( $text );
158 } else { $redir = 0; }
160 $sql = "INSERT INTO cur (cur_id,cur_namespace," .
161 "cur_title,cur_text,cur_comment,cur_user," .
162 "cur_timestamp,cur_minor_edit,cur_is_new," .
163 "cur_restrictions,cur_counter,cur_ind_title," .
164 "cur_ind_text,cur_is_redirect,cur_user_text) VALUES ";
165 $sql .= "({$row->cur_id},{$ns},'{$title}','{$text}'," .
166 "'{$com}',{$row->cur_user},'{$row->cur_timestamp}'," .
167 "{$isme},{$isnew},'{$cr}',0,'{$ititle}','{$itext}'," .
168 "{$redir},'{$cut}')";
169 wfQuery( $sql );
171 if ( ( ++$count % 1000 ) == 0 ) {
172 print "$count article records processed.\n";
174 if ( 0 != $ns ) { continue; }
175 if ( 0 != $redir ) { continue; }
176 if ( false === strstr( $text, "," ) ) { continue; }
177 ++$countables;
179 print "$count article records processed.\n";
180 mysql_free_result( $oldres );
182 $sql = "REPLACE INTO site_stats (ss_row_id,ss_total_views," .
183 "ss_total_edits,ss_good_articles) VALUES (1,0,0,{$countables})";
184 wfQuery( $sql );
186 $sql = "UNLOCK TABLES";
187 $newres = wfQuery( $sql );
190 # Convert May 2002 version of database into new format.
192 function convertOldTable()
194 $count = 0;
195 print "Converting OLD table.\n";
197 $sql = "LOCK TABLES old_old READ, old WRITE";
198 $newres = wfQuery( $sql );
200 $sql = "SELECT old_id,old_title,old_text,old_comment,old_user," .
201 "old_timestamp,old_minor_edit,old_user_text FROM old_old";
202 $oldres = wfQuery( $sql );
204 $sql = "DELETE FROM old";
205 $newres = wfQuery( $sql );
207 while ( $row = mysql_fetch_object( $oldres ) ) {
208 $nt = Title::newFromDBkey( $row->old_title );
209 $title = addslashes( $nt->getDBkey() );
210 $ns = $nt->getNamespace();
211 #$text = addslashes( convertMediaLinks( $row->old_text ) );
212 # DO NOT convert media links on old versions!!!!!
213 # Old table should always be left intact
214 $text = addslashes($row->old_text);
216 $com = addslashes( $row->old_comment );
217 $cut = addslashes( $row->old_user_text );
218 if ( "" == $cut ) { $cut = "Unknown"; }
220 if ( 0 != $row->old_minor_edit ) { $isme = 1; }
221 else { $isme = 0; }
223 if ( preg_match( "/^#redirect/i", $text ) ) {
224 $redir = 1;
225 $text = fixRedirect( $text );
226 } else { $redir = 0; }
228 $sql = "INSERT INTO old (old_id,old_namespace,old_title," .
229 "old_text,old_comment,old_user," .
230 "old_timestamp,old_minor_edit,old_user_text) VALUES ";
231 $sql .= "({$row->old_id},{$ns},'{$title}','{$text}'," .
232 "'{$com}',{$row->old_user},'{$row->old_timestamp}'," .
233 "{$isme},'{$cut}')";
234 wfQuery( $sql );
236 if ( ( ++$count % 1000 ) == 0 ) {
237 print "$count history records processed.\n";
240 print "$count history records processed.\n";
241 mysql_free_result( $oldres );
243 $sql = "UNLOCK TABLES";
244 $newres = wfQuery( $sql );
247 function convertImageDirectoriesX()
249 global $wgImageDirectory, $wgMetaImageDirectory, $wgUploadDirectory;
250 $count = 0;
252 print "Moving image files.\n";
253 $dir = opendir( $wgImageDirectory ) or die(
254 "Couldn't open directory \"{$wgImageDirectory}\".\n" );
256 while ( false !== ( $oname = readdir( $dir ) ) ) {
257 if ( "." == $oname{0} ) continue;
259 $nt = Title::newFromText( $oname );
260 $nname = $nt->getDBkey();
262 $exts = array( "png", "gif", "jpg", "jpeg", "ogg" );
263 $ext = strrchr( $nname, "." );
264 if ( false === $ext ) { $ext = ""; }
265 else { $ext = strtolower( substr( $ext, 1 ) ); }
266 if ( ! in_array( $ext, $exts ) ) {
267 print "Skipping \"{$oname}\"\n";
268 continue;
270 $oldumask = umask(0);
271 $hash = md5( $nname );
272 $dest = $wgUploadDirectory . "/" . $hash{0};
273 if ( ! is_dir( $dest ) ) {
274 mkdir( $dest, 0777 ) or die( "Can't create \"{$dest}\".\n" );
276 $dest .= "/" . substr( $hash, 0, 2 );
277 if ( ! is_dir( $dest ) ) {
278 mkdir( $dest, 0777 ) or die( "Can't create \"{$dest}\".\n" );
280 umask( $oldumask );
282 if ( copy( "{$wgImageDirectory}/{$oname}", "{$dest}/{$nname}" ) ) {
283 ++$count;
285 $sql = "DELETE FROM image WHERE img_name='" .
286 addslashes( $nname ) . "'";
287 $res = wfQuery( $sql );
289 $sql = "INSERT INTO image (img_name,img_timestamp,img_user," .
290 "img_user_text,img_size,img_description) VALUES ('" .
291 addslashes( $nname ) . "','" .
292 date( "YmdHis" ) . "',0,'(Automated conversion)','" .
293 filesize( "{$dest}/{$nname}" ) . "','')";
294 $res = wfQuery( $sql );
295 } else {
296 die( "Couldn't copy \"{$oname}\" to \"{$nname}\"\n" );
299 print "{$count} images moved.\n";
302 function convertImageDirectories()
304 global $wgImageDirectory, $wgMetaImageDirectory, $wgUploadDirectory;
305 $count = 0;
308 $sql = "SELECT DISTINCT il_to FROM imagelinks";
309 $result = wfQuery ( $sql ) ;
311 while ( $row = mysql_fetch_object ( $result ) ) {
312 $oname = $row->il_to ;
313 $nname = ucfirst ( $oname ) ;
315 $exts = array( "png", "gif", "jpg", "jpeg", "ogg" );
316 $ext = strrchr( $nname, "." );
317 if ( false === $ext ) { $ext = ""; }
318 else { $ext = strtolower( substr( $ext, 1 ) ); }
319 if ( ! in_array( $ext, $exts ) ) {
320 print "Skipping \"{$oname}\"\n";
321 continue;
323 $oldumask = umask(0);
324 $hash = md5( $nname );
325 $dest = $wgUploadDirectory . "/" . $hash{0};
326 $wgImageDirectoryHash = $wgImageDirectory . "/" . $hash{0} . "/" . substr ( $hash , 0, 2);
327 $wgMetaImageDirectoryHash = $wgMetaImageDirectory . "/" . $hash{0} . "/" . substr( $hash, 0, 2);
328 if ( ! is_dir( $dest ) ) {
329 mkdir( $dest, 0777 ) or die( "Can't create \"{$dest}\".\n" );
331 $dest .= "/" . substr( $hash, 0, 2 );
332 if ( ! is_dir( $dest ) ) {
333 mkdir( $dest, 0777 ) or die( "Can't create \"{$dest}\".\n" );
335 umask( $oldumask );
337 #echo "Would be copying {$wgImageDirectoryHash}/{$oname} to {$dest}/{$nname}\n";
338 #continue;
340 if ( copy( "{$wgImageDirectoryHash}/{$nname}", "{$dest}/{$nname}" )
341 or copy( "{$wgImageDirectory}/{$oname}", "{$dest}/{$nname}" )
342 or copy( "{$wgImageDirectory}/".strtolower($oname), "{$dest}/{$nname}" )
343 or copy( "{$wgMetaImageDirectoryHash}/{$oname}", "{$dest}/{$nname}" )
344 or copy( "{$wgMetaImageDirectory}/{$oname}", "{$dest}/{$nname}" )
345 or copy( "{$wgMetaImageDirectory}/".strtolower($oname), "{$dest}/{$nname}" ) ) {
346 ++$count;
348 $sql = "DELETE FROM image WHERE img_name='" .
349 addslashes( $nname ) . "'";
350 $res = wfQuery( $sql );
352 $sql = "INSERT INTO image (img_name,img_timestamp,img_user," .
353 "img_user_text,img_size,img_description) VALUES ('" .
354 addslashes( $nname ) . "','" .
355 date( "YmdHis" ) . "',0,'(Automated conversion)','" .
356 filesize( "{$dest}/{$nname}" ) . "','')";
357 $res = wfQuery( $sql );
358 } else {
359 echo( "Couldn't copy \"{$oname}\" to \"{$nname}\"\n" );
364 # Utility functions for the above.
366 function convertMediaLinks( $text )
368 global $wgLang;
369 $ins = $wgLang->getNsText( Namespace::getImage() );
371 $q = $text;
372 $text = preg_replace(
373 "/(^|[^[])http:\/\/(www.||meta.)wikipedia.(?:com|org)\/upload\/(?:[0-9a-f]\/[0-9a-f][0-9a-f]\/|)" .
374 "([a-zA-Z0-9_:.~\%\-]+)\.(png|PNG|jpg|JPG|jpeg|JPEG|gif|GIF)/",
375 "\\1[[{$ins}:\\3.\\4]]", $text );
376 $text = preg_replace(
377 "/(^|[^[])http:\/\/(www.||meta.)wikipedia.(?:com|org)\/images\/uploads\/" .
378 "([a-zA-Z0-9_:.~\%\-]+)\.(png|PNG|jpg|JPG|jpeg|JPEG|gif|GIF)/",
379 "\\1[[{$ins}:\\3.\\4]]", $text );
381 $text = preg_replace(
382 "/(^|[^[])http:\/\/(www.||meta.)wikipedia.(?:com|org)\/upload\/(?:[0-9a-f]\/[0-9a-f][0-9a-f]\/|)" .
383 "([a-zA-Z0-9_:.~\%\-]+)/", "\\1[[media:\\3]]", $text );
384 $text = preg_replace(
385 "/(^|[^[])http:\/\/(www.||meta.)wikipedia.(?:com|org)\/images\/uploads\/" .
386 "([a-zA-Z0-9_:.~\%\-]+)/", "\\1[[media:\\3]]", $text );
388 if ($q != $text) echo "BOOF!"; else echo ".";
389 return $text;
392 function fixRedirect( $text )
394 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\xA0-\\xff]";
395 $re = "#redirect";
396 if ( preg_match( "/^{$re}\\s*\\[{$tc}+\\]/i", $text ) ) {
397 $text = preg_replace( "/^({$re})\\s*\\[\\s*({$tc}+)\\]/i",
398 "\\1 [[\\2]]", $text, 1 );
399 } else if ( preg_match( "/^{$re}\\s+{$tc}+/i", $text ) ) {
400 $text = preg_replace( "/^({$re})\\s+({$tc}+)/i",
401 "\\1 [[\\2]]", $text, 1 );
403 return $text;
406 function fixUserOptions( $in )
408 $s = urldecode( $in );
409 $a = explode( "\n", $s );
411 foreach ( $a as $l ) {
412 if ( preg_match( "/^([A-Za-z0-9_]+)=(.*)/", $l, $m ) ) {
413 $ops[$m[1]] = $m[2];
416 $nops = array();
418 $q = strtolower( $ops["quickBar"] );
419 if ( $q == "none" ) { $q = 0; }
420 else { $q = 1; } # Default to left
421 $nops["quickbar"] = $q;
423 if ( $ops["markupNewTopics"] == "inverse" ) {
424 $nops["highlightbroken"] = 1;
426 $sk = substr( strtolower( $ops["skin"] ), 0, 4 );
427 if ( "star" == $sk ) { $sk = 0; }
428 else if ( "nost" == $sk ) { $sk = 1; }
429 else if ( "colo" == $sk ) { $sk = 2; }
430 else { $sk = 0; }
431 $nops["skin"] = $sk;
433 $u = strtolower( $ops["underlineLinks"] );
434 if ( "yes" == $u || "on" == $u ) { $nops["underline"] = 1; }
435 else { $nops["underline"] = 0; }
437 $t = ( (int) ($ops["hourDiff"]) );
438 if ( $t < -23 || $t > 23 ) { $t = 0; }
439 if ( 0 != $t ) { $nops["timecorrection"] = $t; }
441 $j = strtolower( $ops["justify"] );
442 if ( "yes" == $j || "on" == $j ) { $nops["justify"] = 1; }
443 $n = strtolower( $ops["numberHeadings"] );
444 if ( "yes" == $n || "on" == $n ) { $nops["numberheadings"] = 1; }
445 $h = strtolower( $ops["hideMinor"] );
446 if ( "yes" == $h || "on" == $h ) { $nops["hideminor"] = 1; }
447 $r = strtolower( $ops["rememberPassword"] );
448 if ( "yes" == $r || "on" == $r ) { $nops["rememberpassword"] = 1; }
449 $s = strtolower( $ops["showHover"] );
450 if ( "yes" == $s || "on" == $s ) { $nops["hover"] = 1; }
452 $c = $ops["cols"];
453 if ( $c < 20 || c > 200 ) { $nops["cols"] = 80; }
454 else { $nops["cols"] = $c; }
455 $r = $ops["rows"];
456 if ( $r < 5 || $r > 100 ) { $nops["rows"] = 20; }
457 else { $nops["rows"] = $r; }
458 $r = $ops["resultsPerPage"];
459 if ( $r < 3 || $r > 500 ) { $nops["searchlimit"] = 20; }
460 else { $nops["searchlimit"] = $r; }
461 $r = $ops["viewRecentChanges"];
462 if ( $r < 10 || $r > 1000 ) { $nops["rclimit"] = 50; }
463 else { $nops["rclimit"] = $r; }
464 $nops["rcdays"] = 3;
466 $a = array();
467 foreach ( $nops as $oname => $oval ) {
468 array_push( $a, "$oname=$oval" );
470 $s = implode( "\n", $a );
471 return $s;
474 function fixUserRights( $in )
476 $a = explode( ",", $in );
477 $b = array();
478 foreach ( $a as $r ) {
479 if ( "is_developer" == strtolower( trim( $r ) ) ) {
480 array_push( $b, "developer" );
481 } else if ( "is_sysop" == strtolower( trim( $r ) ) ) {
482 array_push( $b, "sysop" );
485 $out = implode( ",", $b );
486 return $out;
489 function fixUserName( $in )
491 $lc = "-,.()' _0-9A-Za-z\\/:\\xA0-\\xFF";
492 $out = preg_replace( "/[^{$lc}]/", "", $in );
493 $out = ucfirst( trim( str_replace( "_", " ", $out ) ) );
494 return $out;
497 function indexTitle( $in )
499 $lc = "A-Za-z_'0-9&#;\\x90-\\xFF\\-";
500 $t = preg_replace( "/[^{$lc}]+/", " ", $in );
501 # $t = preg_replace( "/\\b[{$lc}][{$lc}]\\b/", " ", $t );
502 $t = preg_replace( "/\\b[{$lc}]\\b/", " ", $t );
503 $t = preg_replace( "/\\s+/", " ", $t );
504 return $t;
507 function indexText( $text, $ititle )
509 global $wgLang;
510 $lc = SearchEngine::legalSearchChars() . "&#;";
512 $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/",
513 " ", strtolower( " " . $text . " " ) ); # Strip HTML markup
514 $text = preg_replace( "/(^|\\n)\\s*==\\s+([^\\n]+)\\s+==\\s/sD",
515 "\\2 \\2 \\2 ", $text ); # Emphasize headings
517 # Strip external URLs
518 $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF";
519 $protos = "http|https|ftp|mailto|news|gopher";
520 $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
521 $text = preg_replace( $pat, "\\1 \\3", $text );
523 $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/";
524 $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
525 $text = preg_replace( $p1, "\\1 ", $text );
526 $text = preg_replace( $p2, "\\1 \\3 ", $text );
528 # Internal image links
529 $ins = $wgLang->getNsText( Namespace::getImage() );
530 $pat2 = "/\\[\\[$ins:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i";
531 $text = preg_replace( $pat2, " \\1 \\3", $text );
533 $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
534 "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
536 # Strip all remaining non-search characters
537 $text = preg_replace( "/[^{$lc}]+/", " ", $text );
539 # Handle 's, s'
540 $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
541 $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
543 # Strip wiki '' and '''
544 $text = preg_replace( "/''[']*/", " ", $text );
546 # Strip 1- and 2-letter words
547 # $text = preg_replace( "/\\s[{$lc}][{$lc}]\\s/", " ", $text );
548 # $text = preg_replace( "/\\s[{$lc}][{$lc}]\\s/", " ", $text );
549 $text = preg_replace( "/\\s[{$lc}]\\s/", " ", $text );
550 $text = preg_replace( "/\\s[{$lc}]\\s/", " ", $text );
552 return $text;
555 function refillRandom()
557 $sql = "INSERT INTO random(ra_current,ra_title) SELECT 0,cur_title " .
558 "FROM cur WHERE cur_namespace=0 AND cur_is_redirect=0 " .
559 "ORDER BY RAND() LIMIT 1000";
560 wfQuery( $sql, $fname );
562 $sql = "UPDATE random SET ra_current=(ra_current+1)";
563 wfQuery( $sql, $fname );
565 $sql = "DELETE FROM random WHERE ra_current>1";
566 wfQuery( $sql, $fname );
569 function renameOldTables()
571 $sql = "ALTER TABLE user RENAME TO old_user";
572 wfQuery( $sql );
573 $sql = "ALTER TABLE cur RENAME TO old_cur";
574 wfQuery( $sql );
575 $sql = "ALTER TABLE old RENAME TO old_old";
576 wfQuery( $sql );
577 $sql = "DROP TABLE IF EXISTS linked";
578 wfQuery( $sql );
579 $sql = "DROP TABLE IF EXISTS unlinked";
580 wfQuery( $sql );
583 function removeOldTables()
585 wfQuery( "DROP TABLE IF EXISTS old_user" );
586 wfQuery( "DROP TABLE IF EXISTS old_linked" );
587 wfQuery( "DROP TABLE IF EXISTS old_unlinked" );
588 wfQuery( "DROP TABLE IF EXISTS old_cur" );
589 wfQuery( "DROP TABLE IF EXISTS old_old" );