rearranging InitialiseMessages code
[mediawiki.git] / maintenance / importUseModWiki.php
blobee6f3717a64476c44ac30b5f99014809168bee59
1 <?php
3 /*
4 Import data from a UseModWiki into a PediaWiki wiki
5 2003-02-09 Brion VIBBER <brion@pobox.com>
6 Based loosely on Magnus's code from 2001-2002
8 Updated limited version to get something working temporarily
9 2003-10-09
10 Be sure to run the link & index rebuilding scripts!
14 /* globals */
15 $wgRootDirectory = "/Users/brion/src/wiki/convert/wiki-fy/lib-http/db/wiki";
16 $wgFieldSeparator = "\xb3"; # Some wikis may use different char
17 $FS = $wgFieldSeparator ;
18 $FS1 = $FS."1" ;
19 $FS2 = $FS."2" ;
20 $FS3 = $FS."3" ;
22 $conversiontime = wfTimestampNow(); # Conversions will be marked with this timestamp
23 $usercache = array();
25 wfSeedRandom();
26 importPages();
28 # ------------------------------------------------------------------------------
30 function importPages()
32 global $wgRootDirectory;
34 $letters = array(
35 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
36 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
37 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' );
38 foreach( $letters as $letter ) {
39 $dir = "$wgRootDirectory/page/$letter";
40 if( is_dir( $dir ) )
41 importPageDirectory( $dir );
45 function importPageDirectory( $dir, $prefix = "" )
47 echo "\n-- Checking page directory $dir\n";
48 $mydir = opendir( $dir );
49 while( $entry = readdir( $mydir ) ) {
50 if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
51 echo importPage( $prefix . $m[1] );
52 } else {
53 if( is_dir( "$dir/$entry" ) ) {
54 if( $entry != '.' && $entry != '..' ) {
55 importPageDirectory( "$dir/$entry", "$entry/" );
57 } else {
58 echo "-- File '$entry' doesn't seem to contain an article. Skipping.\n";
65 # ------------------------------------------------------------------------------
67 /* fetch_ functions
68 Grab a given item from the database
70 function fetchUser( $uid )
72 die ("fetchUser not implemented" );
74 global $FS,$FS2,$FS3, $wgRootDirectory;
76 $fname = $wgRootDirectory . "/page/" . $title;
77 if( !file_exists( $fname ) ) return false;
79 $data = splitHash( implode( "", file( $fname ) ) );
80 # enough?
82 return $data;
85 function useModFilename( $title ) {
86 $c = substr( $title, 0, 1 );
87 if(preg_match( '/[A-Z]/', $c ) ) {
88 return "$c/$title";
90 return "other/$title";
93 function fetchPage( $title )
95 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory;
97 $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db";
98 if( !file_exists( $fname ) ) {
99 die( "Couldn't open file '$fname' for page '$title'.\n" );
102 $page = splitHash( $FS1, file_get_contents( $fname ) );
103 $section = splitHash( $FS2, $page["text_default"] );
104 $text = splitHash( $FS3, $section["data"] );
106 return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,
107 "minor" => $text["minor"] , "ts" => $section["ts"] ,
108 "username" => $section["username"] , "host" => $section["host"] ) );
111 function fetchKeptPages( $title )
113 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection;
115 $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp";
116 if( !file_exists( $fname ) ) return array();
118 $keptlist = explode( $FS1, file_get_contents( $fname ) );
119 array_shift( $keptlist ); # Drop the junk at beginning of file
121 $revisions = array();
122 foreach( $keptlist as $rev ) {
123 $section = splitHash( $FS2, $rev );
124 $text = splitHash( $FS3, $section["data"] );
125 if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) {
126 array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,
127 "minor" => $text["minor"] , "ts" => $section["ts"] ,
128 "username" => $section["username"] , "host" => $section["host"] ) ) );
129 } else {
130 echo "-- skipped a bad old revision\n";
133 return $revisions;
136 function splitHash ( $sep , $str ) {
137 $temp = explode ( $sep , $str ) ;
138 $ret = array () ;
139 for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) {
140 $ret[$temp[$i]] = $temp[++$i] ;
142 return $ret ;
146 /* import_ functions
147 Take a fetched item and produce SQL
150 /* importUser
151 $uid is the UseMod user id number.
152 The new ones will be assigned arbitrarily and are for internal use only.
154 THIS IS DELAYED SINCE PUBLIC DUMPS DONT INCLUDE USER DIR
156 function importUser( $uid )
158 global $last_uid, $user_list, $wgTimestampCorrection;
159 die("importUser NYI");
160 return "";
162 $stuff = fetchUser( $uid );
163 $last_uid++;
165 $name = wfStrencode( $stuff->username );
166 $hash = md5hash( $stuff->password ); # Doable?
167 $tzoffset = $stuff['tzoffset'] - ($wgTimestampCorrection / 3600); # -8 to 0; +9 to +1
168 $hideminor = ($stuff['rcall'] ? 0 : 1);
169 $options = "cols={$stuff['editcols']}
170 rows={$stuff['editrows']}
171 rcdays={$stuff['rcdays']}
172 timecorrection={$tzoffset}
173 hideminor={$hideminor}
176 $sql = "INSERT
177 INTO user (user_id,user_name,user_password,user_options)
178 VALUES ({$last_uid},'{$name}','{$hash}','{$options}');\n";
179 return $sql;
182 function checkUserCache( $name, $host )
184 global $usercache;
186 if( $name ) {
187 if( in_array( $name, $usercache ) ) {
188 $userid = $usercache[$name];
189 } else {
190 # If we haven't imported user accounts
191 $userid = 0;
193 $username = wfStrencode( $name );
194 } else {
195 $userid = 0;
196 $username = wfStrencode( $host );
198 return array( $userid, $username );
201 function importPage( $title )
203 global $usercache;
204 global $conversiontime;
206 echo "\n-- Importing page $title\n";
207 $page = fetchPage( $title );
209 $newtitle = wfStrencode( recodeText( $title ) );
210 $namespace = 0;
212 # Current revision:
213 $text = wfStrencode( recodeText( $page->text ) );
214 $minor = ($page->minor ? 1 : 0);
215 list( $userid, $username ) = checkUserCache( $page->username, $page->host );
216 $timestamp = wfUnix2Timestamp( $page->ts );
217 $redirect = ( preg_match( '/^#REDIRECT/', $page->text ) ? 1 : 0 );
218 $random = mt_rand() / mt_getrandmax();
219 $inverse = wfInvertTimestamp( $timestamp );
220 $sql = "
221 INSERT
222 INTO cur (cur_namespace,cur_title,cur_text,cur_comment,cur_user,cur_user_text,cur_timestamp,inverse_timestamp,cur_touched,cur_minor_edit,cur_is_redirect,cur_random) VALUES
223 ($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse','$conversiontime',$minor,$redirect,$random);\n";
225 # History
226 $revisions = fetchKeptPages( $title );
227 if(count( $revisions ) == 0 ) {
228 return $sql;
231 $any = false;
232 $sql .= "INSERT
233 INTO old (old_namespace,old_title,old_text,old_comment,old_user,old_user_text,old_timestamp,inverse_timestamp,old_minor_edit) VALUES\n";
234 foreach( $revisions as $rev ) {
235 $text = wfStrencode( recodeText( $rev->text ) );
236 $minor = ($rev->minor ? 1 : 0);
237 list( $userid, $username ) = checkUserCache( $rev->username, $rev->host );
238 $username = wfStrencode( recodeText( $username ) );
239 $timestamp = wfUnix2Timestamp( $rev->ts );
240 $inverse = wfInvertTimestamp( $timestamp );
241 $comment = wfStrencode( recodeText( $rev->text ) );
243 if($any) $sql .= ",";
244 $sql .= "\n\t($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse',$minor)";
245 $any = true;
247 $sql .= ";\n\n";
248 return $sql;
251 # Whee!
252 function recodeText( $string ) {
253 # For currently latin-1 wikis
254 $string = str_replace( "\r\n", "\n", $string );
255 # return iconv( "CP1252", "UTF-8", $string );
256 return utf8_encode( $string );
260 function wfStrencode( $string ) {
261 return mysql_escape_string( $string );
264 function wfUnix2Timestamp( $unixtime ) {
265 return gmdate( "YmdHis", $unixtime );
268 function wfTimestamp2Unix( $ts )
270 return gmmktime( ( (int)substr( $ts, 8, 2) ),
271 (int)substr( $ts, 10, 2 ), (int)substr( $ts, 12, 2 ),
272 (int)substr( $ts, 4, 2 ), (int)substr( $ts, 6, 2 ),
273 (int)substr( $ts, 0, 4 ) );
276 function wfTimestampNow() {
277 # return NOW
278 return gmdate( "YmdHis" );
281 # Sorting hack for MySQL 3, which doesn't use index sorts for DESC
282 function wfInvertTimestamp( $ts ) {
283 return strtr(
284 $ts,
285 "0123456789",
286 "9876543210"
290 function wfSeedRandom()
292 $seed = hexdec(substr(md5(microtime()),-8)) & 0x7fffffff;
293 mt_srand( $seed );
294 $wgRandomSeeded = true;
297 function array2object( $arr ) {
298 $o = (object)0;
299 foreach( $arr as $x => $y ) {
300 $o->$x = $y;
302 return $o;