3 * MediaWiki page data importer
4 * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5 * http://www.mediawiki.org/
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 * http://www.gnu.org/copyleft/gpl.html
23 * @subpackage SpecialPage
26 require_once( 'WikiError.php' );
31 function wfSpecialImport( $page = '' ) {
32 global $wgOut, $wgLang, $wgRequest, $wgTitle;
33 global $wgImportSources;
36 # $wgOut->addWikiText( "Special:Import is not ready for this beta release, sorry." );
40 if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
41 $importer = new WikiImporter();
43 switch( $wgRequest->getVal( "source" ) ) {
45 $result = $importer->setupFromUpload( "xmlimport" );
48 $result = $importer->setupFromInterwiki(
49 $wgRequest->getVal( "interwiki" ),
50 $wgRequest->getText( "frompage" ) );
53 $result = new WikiError( "Unknown import source type" );
56 if( WikiError
::isError( $result ) ) {
57 $wgOut->addWikiText( htmlspecialchars( $result->toString() ) );
59 $importer->setRevisionHandler( "wfImportOldRevision" );
60 $result = $importer->doImport();
61 if( WikiError
::isError( $result ) ) {
62 $wgOut->addHTML( "<p>" . wfMsg( "importfailed",
63 htmlspecialchars( $result->toString() ) ) . "</p>" );
66 $wgOut->addHTML( "<p>" . wfMsg( "importsuccess" ) . "</p>" );
71 $wgOut->addWikiText( "<p>" . wfMsg( "importtext" ) . "</p>" );
72 $action = $wgTitle->escapeLocalUrl( 'action=submit' );
75 <legend>Upload XML</legend>
76 <form enctype='multipart/form-data' method='post' action=\"$action\">
77 <input type='hidden' name='action' value='submit' />
78 <input type='hidden' name='source' value='upload' />
79 <input type='hidden' name='MAX_FILE_SIZE' value='2000000' />
80 <input type='file' name='xmlimport' value='' size='30' />
81 <input type='submit' value='" . htmlspecialchars( wfMsg( "uploadbtn" ) ) . "'/>
86 if( !empty( $wgImportSources ) ) {
89 <legend>Interwiki import</legend>
90 <form method='post' action=\"$action\">
91 <input type='hidden' name='action' value='submit' />
92 <input type='hidden' name='source' value='interwiki' />
93 <select name='interwiki'>
95 foreach( $wgImportSources as $interwiki ) {
96 $iw = htmlspecialchars( $interwiki );
97 $wgOut->addHTML( "<option value=\"$iw\">$iw</option>\n" );
101 <input name='frompage' />
102 <input type='submit' />
109 function wfImportOldRevision( &$revision ) {
110 $dbw =& wfGetDB( DB_MASTER
);
111 $dbw->deadlockLoop( array( &$revision, 'importOldRevision' ) );
117 * @subpackage SpecialPage
121 var $timestamp = "20010115000000";
127 function setTitle( $text ) {
128 $text = $this->fixEncoding( $text );
129 $this->title
= Title
::newFromText( $text );
132 function setTimestamp( $ts ) {
133 # 2003-08-05T18:30:02Z
134 $this->timestamp
= preg_replace( '/^(....)-(..)-(..)T(..):(..):(..)Z$/', '$1$2$3$4$5$6', $ts );
137 function setUsername( $user ) {
138 $this->user_text
= $this->fixEncoding( $user );
141 function setUserIP( $ip ) {
142 $this->user_text
= $this->fixEncoding( $ip );
145 function setText( $text ) {
146 $this->text
= $this->fixEncoding( $text );
149 function setComment( $text ) {
150 $this->comment
= $this->fixEncoding( $text );
153 function fixEncoding( $data ) {
154 global $wgContLang, $wgInputEncoding;
156 if( strcasecmp( $wgInputEncoding, "utf-8" ) == 0 ) {
159 return $wgContLang->iconv( "utf-8", $wgInputEncoding, $data );
163 function getTitle() {
167 function getTimestamp() {
168 return $this->timestamp
;
172 return $this->user_text
;
179 function getComment() {
180 return $this->comment
;
183 function importOldRevision() {
184 $fname = "WikiImporter::importOldRevision";
185 $dbw =& wfGetDB( DB_MASTER
);
187 # Sneak a single revision into place
188 $user = User
::newFromName( $this->getUser() );
190 $article = new Article( $this->title
);
191 $pageId = $article->getId();
193 # must create the page...
194 $pageId = $article->insertOn( $dbw );
197 # FIXME: Check for exact conflicts
198 # FIXME: Use original rev_id optionally
199 # FIXME: blah blah blah
201 #if( $numrows > 0 ) {
202 # return wfMsg( "importhistoryconflict" );
206 $revision = new Revision( array(
208 'text' => $this->getText(),
209 'comment' => $this->getComment(),
210 'user' => IntVal( $user->getId() ),
211 'user_text' => $user->getName(),
212 'timestamp' => $this->timestamp
,
215 $revId = $revision->insertOn( $dbw );
216 $article->updateIfNewerOn( $dbw, $revision );
226 * @subpackage SpecialPage
230 var $mRevisionHandler = NULL;
233 function WikiImporter() {
234 $this->setRevisionHandler( array( &$this, "defaultRevisionHandler" ) );
237 function throwXmlError( $err ) {
238 $this->debug( "FAILURE: $err" );
241 function setupFromFile( $filename ) {
242 $this->mSource
= @file_get_contents
( $filename );
243 if( $this->mSource
=== false ) {
244 return new WikiError( "Couldn't open import file" );
249 function setupFromUpload( $fieldname = "xmlimport" ) {
252 $upload =& $_FILES[$fieldname];
254 if( !isset( $upload ) ) {
255 return new WikiErrorMsg( 'importnofile' );
257 if( !empty( $upload['error'] ) ) {
258 return new WikiErrorMsg( 'importuploaderror', $upload['error'] );
260 $fname = $upload['tmp_name'];
261 if( is_uploaded_file( $fname ) ) {
262 return $this->setupFromFile( $fname );
264 return new WikiErrorMsg( 'importnofile' );
268 function setupFromURL( $url ) {
269 # fopen-wrappers are normally turned off for security.
270 ini_set( "allow_url_fopen", true );
271 $ret = $this->setupFromFile( $url );
272 ini_set( "allow_url_fopen", false );
276 function setupFromInterwiki( $interwiki, $page ) {
277 $base = Title
::getInterwikiLink( $interwiki );
278 if( empty( $base ) ) {
279 return new WikiError( 'Bad interwiki link' );
281 $import = wfUrlencode( "Special:Export/$page" );
282 $url = str_replace( "$1", $import, $base );
283 $this->notice( "Importing from $url" );
284 return $this->setupFromURL( $url );
290 function doImport() {
291 if( empty( $this->mSource
) ) {
292 return new WikiErrorMsg( "importnotext" );
295 $parser = xml_parser_create( "UTF-8" );
297 # case folding violates XML standard, turn it off
298 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING
, false );
300 xml_set_object( $parser, &$this );
301 xml_set_element_handler( $parser, "in_start", "" );
303 if( !xml_parse( $parser, $this->mSource
, true ) ) {
304 return new WikiXmlError( $parser );
306 xml_parser_free( $parser );
311 function debug( $data ) {
312 #$this->notice( "DEBUG: $data\n" );
315 function notice( $data ) {
316 global $wgCommandLineMode;
317 if( $wgCommandLineMode ) {
321 $wgOut->addHTML( "<li>$data</li>\n" );
325 function setRevisionHandler( $functionref ) {
326 $this->mRevisionHandler
= $functionref;
329 function defaultRevisionHandler( &$revision ) {
330 $this->debug( "Got revision:" );
331 if( is_object( $revision->title
) ) {
332 $this->debug( "-- Title: " . $revision->title
->getPrefixedText() );
334 $this->debug( "-- Title: <invalid>" );
336 $this->debug( "-- User: " . $revision->user_text
);
337 $this->debug( "-- Timestamp: " . $revision->timestamp
);
338 $this->debug( "-- Comment: " . $revision->comment
);
339 $this->debug( "-- Text: " . $revision->text
);
344 # XML parser callbacks from here out -- beware!
345 function donothing( $parser, $x, $y="" ) {
346 #$this->debug( "donothing" );
349 function in_start( $parser, $name, $attribs ) {
350 $this->debug( "in_start $name" );
351 if( $name != "mediawiki" ) {
352 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
354 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
357 function in_mediawiki( $parser, $name, $attribs ) {
358 $this->debug( "in_mediawiki $name" );
359 if( $name != "page" ) {
360 return $this->throwXMLerror( "Expected <page>, got <$name>" );
362 xml_set_element_handler( $parser, "in_page", "out_page" );
364 function out_mediawiki( $parser, $name ) {
365 $this->debug( "out_mediawiki $name" );
366 if( $name != "mediawiki" ) {
367 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
369 xml_set_element_handler( $parser, "donothing", "donothing" );
372 function in_page( $parser, $name, $attribs ) {
373 $this->debug( "in_page $name" );
378 $this->appendfield
= $name;
379 $this->appenddata
= "";
380 $this->parenttag
= "page";
381 xml_set_element_handler( $parser, "in_nothing", "out_append" );
382 xml_set_character_data_handler( $parser, "char_append" );
385 $this->workRevision
= new WikiRevision
;
386 $this->workRevision
->setTitle( $this->workTitle
);
387 xml_set_element_handler( $parser, "in_revision", "out_revision" );
390 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
394 function out_page( $parser, $name ) {
395 $this->debug( "out_page $name" );
396 if( $name != "page" ) {
397 return $this->throwXMLerror( "Expected </page>, got </$name>" );
399 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
401 $this->workTitle
= NULL;
402 $this->workRevision
= NULL;
405 function in_nothing( $parser, $name, $attribs ) {
406 $this->debug( "in_nothing $name" );
407 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
409 function char_append( $parser, $data ) {
410 $this->debug( "char_append '$data'" );
411 $this->appenddata
.= $data;
413 function out_append( $parser, $name ) {
414 $this->debug( "out_append $name" );
415 if( $name != $this->appendfield
) {
416 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
418 xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" );
419 xml_set_character_data_handler( $parser, "donothing" );
420 switch( $this->appendfield
) {
422 $this->workTitle
= $this->appenddata
;
425 $this->workRevision
->setText( $this->appenddata
);
428 $this->workRevision
->setUsername( $this->appenddata
);
431 $this->workRevision
->setUserIP( $this->appenddata
);
434 $this->workRevision
->setTimestamp( $this->appenddata
);
437 $this->workRevision
->setComment( $this->appenddata
);
440 $this->debug( "Bad append: {$this->appendfield}" );
442 $this->appendfield
= "";
443 $this->appenddata
= "";
446 function in_revision( $parser, $name, $attribs ) {
447 $this->debug( "in_revision $name" );
453 $this->parenttag
= "revision";
454 $this->appendfield
= $name;
455 xml_set_element_handler( $parser, "in_nothing", "out_append" );
456 xml_set_character_data_handler( $parser, "char_append" );
459 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
462 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
466 function out_revision( $parser, $name ) {
467 $this->debug( "out_revision $name" );
468 if( $name != "revision" ) {
469 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
471 xml_set_element_handler( $parser, "in_page", "out_page" );
473 $out = call_user_func( $this->mRevisionHandler
, &$this->workRevision
, &$this );
474 if( !empty( $out ) ) {
476 $wgOut->addHTML( "<li>" . $out . "</li>\n" );
480 function in_contributor( $parser, $name, $attribs ) {
481 $this->debug( "in_contributor $name" );
485 $this->parenttag
= "contributor";
486 $this->appendfield
= $name;
487 xml_set_element_handler( $parser, "in_nothing", "out_append" );
488 xml_set_character_data_handler( $parser, "char_append" );
491 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
495 function out_contributor( $parser, $name ) {
496 $this->debug( "out_contributor $name" );
497 if( $name != "contributor" ) {
498 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
500 xml_set_element_handler( $parser, "in_revision", "out_revision" );