4 Import data from a UseModWiki into a PediaWiki wiki
5 2003-02-09 Brion VIBBER <brion@pobox.com>
6 Based loosely on Magnus's code from 2001-2002
8 Updated limited version to get something working temporarily
10 Be sure to run the link & index rebuilding scripts!
15 $wgRootDirectory = "/Users/brion/src/wiki/convert/wiki-fy/lib-http/db/wiki";
16 $wgFieldSeparator = "\xb3"; # Some wikis may use different char
17 $FS = $wgFieldSeparator ;
22 $conversiontime = wfTimestampNow(); # Conversions will be marked with this timestamp
28 # ------------------------------------------------------------------------------
30 function importPages()
32 global $wgRootDirectory;
35 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
36 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
37 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' );
38 foreach( $letters as $letter ) {
39 $dir = "$wgRootDirectory/page/$letter";
41 importPageDirectory( $dir );
45 function importPageDirectory( $dir, $prefix = "" )
47 echo "\n-- Checking page directory $dir\n";
48 $mydir = opendir( $dir );
49 while( $entry = readdir( $mydir ) ) {
50 if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
51 echo importPage( $prefix . $m[1] );
53 if( is_dir( "$dir/$entry" ) ) {
54 if( $entry != '.' && $entry != '..' ) {
55 importPageDirectory( "$dir/$entry", "$entry/" );
58 echo "-- File '$entry' doesn't seem to contain an article. Skipping.\n";
65 # ------------------------------------------------------------------------------
68 Grab a given item from the database
70 function fetchUser( $uid )
72 die ("fetchUser not implemented" );
74 global $FS,$FS2,$FS3, $wgRootDirectory;
76 $fname = $wgRootDirectory . "/page/" . $title;
77 if( !file_exists( $fname ) ) return false;
79 $data = splitHash( implode( "", file( $fname ) ) );
85 function useModFilename( $title ) {
86 $c = substr( $title, 0, 1 );
87 if(preg_match( '/[A-Z]/', $c ) ) {
90 return "other/$title";
93 function fetchPage( $title )
95 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory;
97 $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db";
98 if( !file_exists( $fname ) ) {
99 die( "Couldn't open file '$fname' for page '$title'.\n" );
102 $page = splitHash( $FS1, file_get_contents( $fname ) );
103 $section = splitHash( $FS2, $page["text_default"] );
104 $text = splitHash( $FS3, $section["data"] );
106 return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,
107 "minor" => $text["minor"] , "ts" => $section["ts"] ,
108 "username" => $section["username"] , "host" => $section["host"] ) );
111 function fetchKeptPages( $title )
113 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection;
115 $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp";
116 if( !file_exists( $fname ) ) return array();
118 $keptlist = explode( $FS1, file_get_contents( $fname ) );
119 array_shift( $keptlist ); # Drop the junk at beginning of file
121 $revisions = array();
122 foreach( $keptlist as $rev ) {
123 $section = splitHash( $FS2, $rev );
124 $text = splitHash( $FS3, $section["data"] );
125 if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) {
126 array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,
127 "minor" => $text["minor"] , "ts" => $section["ts"] ,
128 "username" => $section["username"] , "host" => $section["host"] ) ) );
130 echo "-- skipped a bad old revision\n";
136 function splitHash ( $sep , $str ) {
137 $temp = explode ( $sep , $str ) ;
139 for ( $i = 0; $i+
1 < count ( $temp ) ; $i++
) {
140 $ret[$temp[$i]] = $temp[++
$i] ;
147 Take a fetched item and produce SQL
151 $uid is the UseMod user id number.
152 The new ones will be assigned arbitrarily and are for internal use only.
154 THIS IS DELAYED SINCE PUBLIC DUMPS DONT INCLUDE USER DIR
156 function importUser( $uid )
158 global $last_uid, $user_list, $wgTimestampCorrection;
159 die("importUser NYI");
162 $stuff = fetchUser( $uid );
165 $name = wfStrencode( $stuff->username
);
166 $hash = md5hash( $stuff->password
); # Doable?
167 $tzoffset = $stuff['tzoffset'] - ($wgTimestampCorrection / 3600); # -8 to 0; +9 to +1
168 $hideminor = ($stuff['rcall'] ?
0 : 1);
169 $options = "cols={$stuff['editcols']}
170 rows={$stuff['editrows']}
171 rcdays={$stuff['rcdays']}
172 timecorrection={$tzoffset}
173 hideminor={$hideminor}
177 INTO user (user_id,user_name,user_password,user_options)
178 VALUES ({$last_uid},'{$name}','{$hash}','{$options}');\n";
182 function checkUserCache( $name, $host )
187 if( in_array( $name, $usercache ) ) {
188 $userid = $usercache[$name];
190 # If we haven't imported user accounts
193 $username = wfStrencode( $name );
196 $username = wfStrencode( $host );
198 return array( $userid, $username );
201 function importPage( $title )
204 global $conversiontime;
206 echo "\n-- Importing page $title\n";
207 $page = fetchPage( $title );
209 $newtitle = wfStrencode( recodeText( $title ) );
213 $text = wfStrencode( recodeText( $page->text
) );
214 $minor = ($page->minor ?
1 : 0);
215 list( $userid, $username ) = checkUserCache( $page->username
, $page->host
);
216 $timestamp = wfUnix2Timestamp( $page->ts
);
217 $redirect = ( preg_match( '/^#REDIRECT/', $page->text
) ?
1 : 0 );
218 $random = mt_rand() / mt_getrandmax();
219 $inverse = wfInvertTimestamp( $timestamp );
222 INTO cur (cur_namespace,cur_title,cur_text,cur_comment,cur_user,cur_user_text,cur_timestamp,inverse_timestamp,cur_touched,cur_minor_edit,cur_is_redirect,cur_random) VALUES
223 ($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse','$conversiontime',$minor,$redirect,$random);\n";
226 $revisions = fetchKeptPages( $title );
227 if(count( $revisions ) == 0 ) {
233 INTO old (old_namespace,old_title,old_text,old_comment,old_user,old_user_text,old_timestamp,inverse_timestamp,old_minor_edit) VALUES\n";
234 foreach( $revisions as $rev ) {
235 $text = wfStrencode( recodeText( $rev->text
) );
236 $minor = ($rev->minor ?
1 : 0);
237 list( $userid, $username ) = checkUserCache( $rev->username
, $rev->host
);
238 $username = wfStrencode( recodeText( $username ) );
239 $timestamp = wfUnix2Timestamp( $rev->ts
);
240 $inverse = wfInvertTimestamp( $timestamp );
241 $comment = wfStrencode( recodeText( $rev->text
) );
243 if($any) $sql .= ",";
244 $sql .= "\n\t($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse',$minor)";
252 function recodeText( $string ) {
253 # For currently latin-1 wikis
254 $string = str_replace( "\r\n", "\n", $string );
255 # return iconv( "CP1252", "UTF-8", $string );
256 return utf8_encode( $string );
260 function wfStrencode( $string ) {
261 return mysql_escape_string( $string );
264 function wfUnix2Timestamp( $unixtime ) {
265 return gmdate( "YmdHis", $unixtime );
268 function wfTimestamp2Unix( $ts )
270 return gmmktime( ( (int)substr( $ts, 8, 2) ),
271 (int)substr( $ts, 10, 2 ), (int)substr( $ts, 12, 2 ),
272 (int)substr( $ts, 4, 2 ), (int)substr( $ts, 6, 2 ),
273 (int)substr( $ts, 0, 4 ) );
276 function wfTimestampNow() {
278 return gmdate( "YmdHis" );
281 # Sorting hack for MySQL 3, which doesn't use index sorts for DESC
282 function wfInvertTimestamp( $ts ) {
290 function wfSeedRandom()
292 $seed = hexdec(substr(md5(microtime()),-8)) & 0x7fffffff;
294 $wgRandomSeeded = true;
297 function array2object( $arr ) {
299 foreach( $arr as $x => $y ) {