simple script to generate phpdocumentor documentation and troubleshoot its generation
[mediawiki.git] / maintenance / importUseModWiki.php
blobc0a4b24df5e08e62ccb61acdf48b3862e607f538
1 <?php
3 /**
4 * Import data from a UseModWiki into a PediaWiki wiki
5 * 2003-02-09 Brion VIBBER <brion@pobox.com>
6 * Based loosely on Magnus's code from 2001-2002
8 * Updated limited version to get something working temporarily
9 * 2003-10-09
10 * Be sure to run the link & index rebuilding scripts!
12 * Some more munging for charsets etc
13 * 2003-11-28
15 * @todo document
16 * @package MediaWiki
17 * @subpackage Maintenance
20 /** Set these correctly! */
21 $wgImportEncoding = "CP1252"; /* We convert all to UTF-8 */
22 $wgRootDirectory = "/home/usemod/wiki-ia/lib-http/db/wiki";
24 /* globals */
25 $wgFieldSeparator = "\xb3"; # Some wikis may use different char
26 $FS = $wgFieldSeparator ;
27 $FS1 = $FS."1" ;
28 $FS2 = $FS."2" ;
29 $FS3 = $FS."3" ;
31 $conversiontime = wfTimestampNow(); # Conversions will be marked with this timestamp
32 $usercache = array();
34 wfSeedRandom();
35 importPages();
37 # ------------------------------------------------------------------------------
39 function importPages()
41 global $wgRootDirectory;
43 $letters = array(
44 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
45 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
46 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' );
47 foreach( $letters as $letter ) {
48 $dir = "$wgRootDirectory/page/$letter";
49 if( is_dir( $dir ) )
50 importPageDirectory( $dir );
54 function importPageDirectory( $dir, $prefix = "" )
56 echo "\n-- Checking page directory $dir\n";
57 $mydir = opendir( $dir );
58 while( $entry = readdir( $mydir ) ) {
59 if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
60 echo importPage( $prefix . $m[1] );
61 } else {
62 if( is_dir( "$dir/$entry" ) ) {
63 if( $entry != '.' && $entry != '..' ) {
64 importPageDirectory( "$dir/$entry", "$entry/" );
66 } else {
67 echo "-- File '$entry' doesn't seem to contain an article. Skipping.\n";
74 # ------------------------------------------------------------------------------
76 /* fetch_ functions
77 Grab a given item from the database
79 function fetchUser( $uid )
81 die ("fetchUser not implemented" );
83 global $FS,$FS2,$FS3, $wgRootDirectory;
85 $fname = $wgRootDirectory . "/page/" . $title;
86 if( !file_exists( $fname ) ) return false;
88 $data = splitHash( implode( "", file( $fname ) ) );
89 # enough?
91 return $data;
94 function useModFilename( $title ) {
95 $c = substr( $title, 0, 1 );
96 if(preg_match( '/[A-Z]/', $c ) ) {
97 return "$c/$title";
99 return "other/$title";
102 function fetchPage( $title )
104 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory;
106 $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db";
107 if( !file_exists( $fname ) ) {
108 die( "Couldn't open file '$fname' for page '$title'.\n" );
111 $page = splitHash( $FS1, file_get_contents( $fname ) );
112 $section = splitHash( $FS2, $page["text_default"] );
113 $text = splitHash( $FS3, $section["data"] );
115 return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,
116 "minor" => $text["minor"] , "ts" => $section["ts"] ,
117 "username" => $section["username"] , "host" => $section["host"] ) );
120 function fetchKeptPages( $title )
122 global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection;
124 $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp";
125 if( !file_exists( $fname ) ) return array();
127 $keptlist = explode( $FS1, file_get_contents( $fname ) );
128 array_shift( $keptlist ); # Drop the junk at beginning of file
130 $revisions = array();
131 foreach( $keptlist as $rev ) {
132 $section = splitHash( $FS2, $rev );
133 $text = splitHash( $FS3, $section["data"] );
134 if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) {
135 array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,
136 "minor" => $text["minor"] , "ts" => $section["ts"] ,
137 "username" => $section["username"] , "host" => $section["host"] ) ) );
138 } else {
139 echo "-- skipped a bad old revision\n";
142 return $revisions;
145 function splitHash ( $sep , $str ) {
146 $temp = explode ( $sep , $str ) ;
147 $ret = array () ;
148 for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) {
149 $ret[$temp[$i]] = $temp[++$i] ;
151 return $ret ;
155 /* import_ functions
156 Take a fetched item and produce SQL
159 /* importUser
160 $uid is the UseMod user id number.
161 The new ones will be assigned arbitrarily and are for internal use only.
163 THIS IS DELAYED SINCE PUBLIC DUMPS DONT INCLUDE USER DIR
165 function importUser( $uid )
167 global $last_uid, $user_list, $wgTimestampCorrection;
168 die("importUser NYI");
169 return "";
171 $stuff = fetchUser( $uid );
172 $last_uid++;
174 $name = wfStrencode( $stuff->username );
175 $hash = md5hash( $stuff->password ); # Doable?
176 $tzoffset = $stuff['tzoffset'] - ($wgTimestampCorrection / 3600); # -8 to 0; +9 to +1
177 $hideminor = ($stuff['rcall'] ? 0 : 1);
178 $options = "cols={$stuff['editcols']}
179 rows={$stuff['editrows']}
180 rcdays={$stuff['rcdays']}
181 timecorrection={$tzoffset}
182 hideminor={$hideminor}
185 $sql = "INSERT
186 INTO user (user_id,user_name,user_password,user_options)
187 VALUES ({$last_uid},'{$name}','{$hash}','{$options}');\n";
188 return $sql;
191 function checkUserCache( $name, $host )
193 global $usercache;
195 if( $name ) {
196 if( in_array( $name, $usercache ) ) {
197 $userid = $usercache[$name];
198 } else {
199 # If we haven't imported user accounts
200 $userid = 0;
202 $username = wfStrencode( $name );
203 } else {
204 $userid = 0;
205 $username = wfStrencode( $host );
207 return array( $userid, $username );
210 function importPage( $title )
212 global $usercache;
213 global $conversiontime;
215 echo "\n-- Importing page $title\n";
216 $page = fetchPage( $title );
218 $newtitle = wfStrencode( recodeText( $title ) );
219 $namespace = 0;
221 # Current revision:
222 $text = wfStrencode( recodeText( $page->text ) );
223 $comment = wfStrencode( recodeText( $page->summary ) );
224 $minor = ($page->minor ? 1 : 0);
225 list( $userid, $username ) = checkUserCache( $page->username, $page->host );
226 $username = wfStrencode( recodeText( $username ) );
227 $timestamp = wfUnix2Timestamp( $page->ts );
228 $redirect = ( preg_match( '/^#REDIRECT/', $page->text ) ? 1 : 0 );
229 $random = mt_rand() / mt_getrandmax();
230 $inverse = wfInvertTimestamp( $timestamp );
231 $sql = "
232 INSERT
233 INTO cur (cur_namespace,cur_title,cur_text,cur_comment,cur_user,cur_user_text,cur_timestamp,inverse_timestamp,cur_touched,cur_minor_edit,cur_is_redirect,cur_random) VALUES
234 ($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse','$conversiontime',$minor,$redirect,$random);\n";
236 # History
237 $revisions = fetchKeptPages( $title );
238 if(count( $revisions ) == 0 ) {
239 return $sql;
242 $any = false;
243 $sql .= "INSERT
244 INTO old (old_namespace,old_title,old_text,old_comment,old_user,old_user_text,old_timestamp,inverse_timestamp,old_minor_edit) VALUES\n";
245 foreach( $revisions as $rev ) {
246 $text = wfStrencode( recodeText( $rev->text ) );
247 $minor = ($rev->minor ? 1 : 0);
248 list( $userid, $username ) = checkUserCache( $rev->username, $rev->host );
249 $username = wfStrencode( recodeText( $username ) );
250 $timestamp = wfUnix2Timestamp( $rev->ts );
251 $inverse = wfInvertTimestamp( $timestamp );
252 $comment = wfStrencode( recodeText( $rev->summary ) );
254 if($any) $sql .= ",";
255 $sql .= "\n\t($namespace,'$newtitle','$text','$comment',$userid,'$username','$timestamp','$inverse',$minor)";
256 $any = true;
258 $sql .= ";\n\n";
259 return $sql;
262 # Whee!
263 function recodeText( $string ) {
264 global $wgImportEncoding;
265 # For currently latin-1 wikis
266 $string = str_replace( "\r\n", "\n", $string );
267 $string = iconv( $wgImportEncoding, "UTF-8", $string );
268 $string = wfMungeToUtf8( $string ); # Any old &#1234; stuff
269 return $string;
272 function wfUtf8Sequence($codepoint) {
273 if($codepoint < 0x80) return chr($codepoint);
274 if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) .
275 chr($codepoint & 0x3f | 0x80);
276 if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
277 chr($codepoint >> 6 & 0x3f | 0x80) .
278 chr($codepoint & 0x3f | 0x80);
279 if($codepoint < 0x100000) return chr($codepoint >> 18 & 0x07 | 0xf0) . # Double-check this
280 chr($codepoint >> 12 & 0x3f | 0x80) .
281 chr($codepoint >> 6 & 0x3f | 0x80) .
282 chr($codepoint & 0x3f | 0x80);
283 # Doesn't yet handle outside the BMP
284 return "&#$codepoint;";
287 function wfMungeToUtf8($string) {
288 $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string );
289 $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string );
290 # Should also do named entities here
291 return $string;
294 function wfStrencode( $string ) {
295 return mysql_escape_string( $string );
298 function wfUnix2Timestamp( $unixtime ) {
299 return gmdate( "YmdHis", $unixtime );
302 function wfTimestamp2Unix( $ts )
304 return gmmktime( ( (int)substr( $ts, 8, 2) ),
305 (int)substr( $ts, 10, 2 ), (int)substr( $ts, 12, 2 ),
306 (int)substr( $ts, 4, 2 ), (int)substr( $ts, 6, 2 ),
307 (int)substr( $ts, 0, 4 ) );
310 function wfTimestampNow() {
311 # return NOW
312 return gmdate( "YmdHis" );
315 # Sorting hack for MySQL 3, which doesn't use index sorts for DESC
316 function wfInvertTimestamp( $ts ) {
317 return strtr(
318 $ts,
319 "0123456789",
320 "9876543210"
324 function wfSeedRandom()
326 $seed = hexdec(substr(md5(microtime()),-8)) & 0x7fffffff;
327 mt_srand( $seed );
328 $wgRandomSeeded = true;
331 function array2object( $arr ) {
332 $o = (object)0;
333 foreach( $arr as $x => $y ) {
334 $o->$x = $y;
336 return $o;