3 * MediaWiki page data importer
4 * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5 * http://www.mediawiki.org/
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 * http://www.gnu.org/copyleft/gpl.html
23 * @subpackage SpecialPage
26 require_once( 'WikiError.php' );
31 function wfSpecialImport( $page = '' ) {
32 global $wgOut, $wgLang, $wgRequest, $wgTitle;
33 global $wgImportSources;
36 # $wgOut->addWikiText( "Special:Import is not ready for this beta release, sorry." );
40 if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
41 $importer = new WikiImporter();
43 switch( $wgRequest->getVal( "source" ) ) {
45 $result = $importer->setupFromUpload( "xmlimport" );
48 $result = $importer->setupFromInterwiki(
49 $wgRequest->getVal( "interwiki" ),
50 $wgRequest->getText( "frompage" ) );
53 $result = new WikiError( "Unknown import source type" );
56 if( WikiError
::isError( $result ) ) {
57 $wgOut->addWikiText( htmlspecialchars( $result->toString() ) );
59 $importer->setRevisionHandler( "wfImportOldRevision" );
60 $result = $importer->doImport();
61 if( WikiError
::isError( $result ) ) {
62 $wgOut->addHTML( "<p>" . wfMsg( "importfailed",
63 htmlspecialchars( $result->toString() ) ) . "</p>" );
66 $wgOut->addHTML( "<p>" . wfMsg( "importsuccess" ) . "</p>" );
71 $wgOut->addWikiText( "<p>" . wfMsg( "importtext" ) . "</p>" );
72 $action = $wgTitle->escapeLocalUrl( 'action=submit' );
75 <legend>Upload XML</legend>
76 <form enctype='multipart/form-data' method='post' action=\"$action\">
77 <input type='hidden' name='action' value='submit' />
78 <input type='hidden' name='source' value='upload' />
79 <input type='hidden' name='MAX_FILE_SIZE' value='2000000' />
80 <input type='file' name='xmlimport' value='' size='30' />
81 <input type='submit' value='" . htmlspecialchars( wfMsg( "uploadbtn" ) ) . "'/>
86 if( !empty( $wgImportSources ) ) {
89 <legend>Interwiki import</legend>
90 <form method='post' action=\"$action\">
91 <input type='hidden' name='action' value='submit' />
92 <input type='hidden' name='source' value='interwiki' />
93 <select name='interwiki'>
95 foreach( $wgImportSources as $interwiki ) {
96 $iw = htmlspecialchars( $interwiki );
97 $wgOut->addHTML( "<option value=\"$iw\">$iw</option>\n" );
101 <input name='frompage' />
102 <input type='submit' />
109 function wfImportOldRevision( &$revision ) {
110 $dbw =& wfGetDB( DB_MASTER
);
111 $dbw->deadlockLoop( array( &$revision, 'importOldRevision' ) );
117 * @subpackage SpecialPage
121 var $timestamp = "20010115000000";
127 function setTitle( $text ) {
128 $this->title
= Title
::newFromText( $text );
131 function setTimestamp( $ts ) {
132 # 2003-08-05T18:30:02Z
133 $this->timestamp
= preg_replace( '/^(....)-(..)-(..)T(..):(..):(..)Z$/', '$1$2$3$4$5$6', $ts );
136 function setUsername( $user ) {
137 $this->user_text
= $user;
140 function setUserIP( $ip ) {
141 $this->user_text
= $ip;
144 function setText( $text ) {
148 function setComment( $text ) {
149 $this->comment
= $text;
152 function getTitle() {
156 function getTimestamp() {
157 return $this->timestamp
;
161 return $this->user_text
;
168 function getComment() {
169 return $this->comment
;
172 function importOldRevision() {
173 $fname = "WikiImporter::importOldRevision";
174 $dbw =& wfGetDB( DB_MASTER
);
176 # Sneak a single revision into place
177 $user = User
::newFromName( $this->getUser() );
179 $article = new Article( $this->title
);
180 $pageId = $article->getId();
182 # must create the page...
183 $pageId = $article->insertOn( $dbw );
186 # FIXME: Check for exact conflicts
187 # FIXME: Use original rev_id optionally
188 # FIXME: blah blah blah
190 #if( $numrows > 0 ) {
191 # return wfMsg( "importhistoryconflict" );
195 $revision = new Revision( array(
197 'text' => $this->getText(),
198 'comment' => $this->getComment(),
199 'user' => IntVal( $user->getId() ),
200 'user_text' => $user->getName(),
201 'timestamp' => $this->timestamp
,
204 $revId = $revision->insertOn( $dbw );
205 $article->updateIfNewerOn( $dbw, $revision );
215 * @subpackage SpecialPage
219 var $mRevisionHandler = NULL;
222 function WikiImporter() {
223 $this->setRevisionHandler( array( &$this, "defaultRevisionHandler" ) );
226 function throwXmlError( $err ) {
227 $this->debug( "FAILURE: $err" );
230 function setupFromFile( $filename ) {
231 $this->mSource
= @file_get_contents
( $filename );
232 if( $this->mSource
=== false ) {
233 return new WikiError( "Couldn't open import file" );
238 function setupFromUpload( $fieldname = "xmlimport" ) {
241 $upload =& $_FILES[$fieldname];
243 if( !isset( $upload ) ) {
244 return new WikiErrorMsg( 'importnofile' );
246 if( !empty( $upload['error'] ) ) {
247 return new WikiErrorMsg( 'importuploaderror', $upload['error'] );
249 $fname = $upload['tmp_name'];
250 if( is_uploaded_file( $fname ) ) {
251 return $this->setupFromFile( $fname );
253 return new WikiErrorMsg( 'importnofile' );
257 function setupFromURL( $url ) {
258 # fopen-wrappers are normally turned off for security.
259 ini_set( "allow_url_fopen", true );
260 $ret = $this->setupFromFile( $url );
261 ini_set( "allow_url_fopen", false );
265 function setupFromInterwiki( $interwiki, $page ) {
266 $base = Title
::getInterwikiLink( $interwiki );
267 if( empty( $base ) ) {
268 return new WikiError( 'Bad interwiki link' );
270 $import = wfUrlencode( "Special:Export/$page" );
271 $url = str_replace( "$1", $import, $base );
272 $this->notice( "Importing from $url" );
273 return $this->setupFromURL( $url );
279 function doImport() {
280 if( empty( $this->mSource
) ) {
281 return new WikiErrorMsg( "importnotext" );
284 $parser = xml_parser_create( "UTF-8" );
286 # case folding violates XML standard, turn it off
287 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING
, false );
289 xml_set_object( $parser, &$this );
290 xml_set_element_handler( $parser, "in_start", "" );
292 if( !xml_parse( $parser, $this->mSource
, true ) ) {
293 return new WikiXmlError( $parser );
295 xml_parser_free( $parser );
300 function debug( $data ) {
301 #$this->notice( "DEBUG: $data\n" );
304 function notice( $data ) {
305 global $wgCommandLineMode;
306 if( $wgCommandLineMode ) {
310 $wgOut->addHTML( "<li>$data</li>\n" );
314 function setRevisionHandler( $functionref ) {
315 $this->mRevisionHandler
= $functionref;
318 function defaultRevisionHandler( &$revision ) {
319 $this->debug( "Got revision:" );
320 if( is_object( $revision->title
) ) {
321 $this->debug( "-- Title: " . $revision->title
->getPrefixedText() );
323 $this->debug( "-- Title: <invalid>" );
325 $this->debug( "-- User: " . $revision->user_text
);
326 $this->debug( "-- Timestamp: " . $revision->timestamp
);
327 $this->debug( "-- Comment: " . $revision->comment
);
328 $this->debug( "-- Text: " . $revision->text
);
333 # XML parser callbacks from here out -- beware!
334 function donothing( $parser, $x, $y="" ) {
335 #$this->debug( "donothing" );
338 function in_start( $parser, $name, $attribs ) {
339 $this->debug( "in_start $name" );
340 if( $name != "mediawiki" ) {
341 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
343 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
346 function in_mediawiki( $parser, $name, $attribs ) {
347 $this->debug( "in_mediawiki $name" );
348 if( $name != "page" ) {
349 return $this->throwXMLerror( "Expected <page>, got <$name>" );
351 xml_set_element_handler( $parser, "in_page", "out_page" );
353 function out_mediawiki( $parser, $name ) {
354 $this->debug( "out_mediawiki $name" );
355 if( $name != "mediawiki" ) {
356 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
358 xml_set_element_handler( $parser, "donothing", "donothing" );
361 function in_page( $parser, $name, $attribs ) {
362 $this->debug( "in_page $name" );
367 $this->appendfield
= $name;
368 $this->appenddata
= "";
369 $this->parenttag
= "page";
370 xml_set_element_handler( $parser, "in_nothing", "out_append" );
371 xml_set_character_data_handler( $parser, "char_append" );
374 $this->workRevision
= new WikiRevision
;
375 $this->workRevision
->setTitle( $this->workTitle
);
376 xml_set_element_handler( $parser, "in_revision", "out_revision" );
379 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
383 function out_page( $parser, $name ) {
384 $this->debug( "out_page $name" );
385 if( $name != "page" ) {
386 return $this->throwXMLerror( "Expected </page>, got </$name>" );
388 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
390 $this->workTitle
= NULL;
391 $this->workRevision
= NULL;
394 function in_nothing( $parser, $name, $attribs ) {
395 $this->debug( "in_nothing $name" );
396 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
398 function char_append( $parser, $data ) {
399 $this->debug( "char_append '$data'" );
400 $this->appenddata
.= $data;
402 function out_append( $parser, $name ) {
403 $this->debug( "out_append $name" );
404 if( $name != $this->appendfield
) {
405 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
407 xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" );
408 xml_set_character_data_handler( $parser, "donothing" );
409 switch( $this->appendfield
) {
411 $this->workTitle
= $this->appenddata
;
414 $this->workRevision
->setText( $this->appenddata
);
417 $this->workRevision
->setUsername( $this->appenddata
);
420 $this->workRevision
->setUserIP( $this->appenddata
);
423 $this->workRevision
->setTimestamp( $this->appenddata
);
426 $this->workRevision
->setComment( $this->appenddata
);
429 $this->debug( "Bad append: {$this->appendfield}" );
431 $this->appendfield
= "";
432 $this->appenddata
= "";
435 function in_revision( $parser, $name, $attribs ) {
436 $this->debug( "in_revision $name" );
442 $this->parenttag
= "revision";
443 $this->appendfield
= $name;
444 xml_set_element_handler( $parser, "in_nothing", "out_append" );
445 xml_set_character_data_handler( $parser, "char_append" );
448 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
451 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
455 function out_revision( $parser, $name ) {
456 $this->debug( "out_revision $name" );
457 if( $name != "revision" ) {
458 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
460 xml_set_element_handler( $parser, "in_page", "out_page" );
462 $out = call_user_func( $this->mRevisionHandler
, &$this->workRevision
, &$this );
463 if( !empty( $out ) ) {
465 $wgOut->addHTML( "<li>" . $out . "</li>\n" );
469 function in_contributor( $parser, $name, $attribs ) {
470 $this->debug( "in_contributor $name" );
474 $this->parenttag
= "contributor";
475 $this->appendfield
= $name;
476 xml_set_element_handler( $parser, "in_nothing", "out_append" );
477 xml_set_character_data_handler( $parser, "char_append" );
480 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
484 function out_contributor( $parser, $name ) {
485 $this->debug( "out_contributor $name" );
486 if( $name != "contributor" ) {
487 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
489 xml_set_element_handler( $parser, "in_revision", "out_revision" );