3 * MediaWiki page data importer
4 * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5 * http://www.mediawiki.org/
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
23 * @subpackage SpecialPage
29 function wfSpecialImport( $page = '' ) {
30 global $wgUser, $wgOut, $wgRequest, $wgTitle, $wgImportSources;
33 # $wgOut->addWikiText( "Special:Import is not ready for this beta release, sorry." );
37 if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
38 switch( $wgRequest->getVal( "source" ) ) {
40 if( $wgUser->isAllowed( 'importupload' ) ) {
41 $source = ImportStreamSource
::newFromUpload( "xmlimport" );
43 return $wgOut->permissionRequired( 'importupload' );
47 $source = ImportStreamSource
::newFromInterwiki(
48 $wgRequest->getVal( "interwiki" ),
49 $wgRequest->getText( "frompage" ) );
52 $source = new WikiError( "Unknown import source type" );
55 if( WikiError
::isError( $source ) ) {
56 $wgOut->addWikiText( wfEscapeWikiText( $source->getMessage() ) );
58 $importer = new WikiImporter( $source );
59 $result = $importer->doImport();
60 if( WikiError
::isError( $result ) ) {
61 $wgOut->addWikiText( wfMsg( "importfailed",
62 wfEscapeWikiText( $result->getMessage() ) ) );
65 $wgOut->addWikiText( wfMsg( "importsuccess" ) );
70 $action = $wgTitle->escapeLocalUrl( 'action=submit' );
72 if( $wgUser->isAllowed( 'importupload' ) ) {
73 $wgOut->addWikiText( wfMsg( "importtext" ) );
76 <legend>" . wfMsgHtml('upload') . "</legend>
77 <form enctype='multipart/form-data' method='post' action=\"$action\">
78 <input type='hidden' name='action' value='submit' />
79 <input type='hidden' name='source' value='upload' />
80 <input type='hidden' name='MAX_FILE_SIZE' value='2000000' />
81 <input type='file' name='xmlimport' value='' size='30' />
82 <input type='submit' value=\"" . wfMsgHtml( "uploadbtn" ) . "\" />
87 if( empty( $wgImportSources ) ) {
88 $wgOut->addWikiText( wfMsg( 'importnosources' ) );
92 if( !empty( $wgImportSources ) ) {
95 <legend>" . wfMsgHtml('importinterwiki') . "</legend>
96 <form method='post' action=\"$action\">
97 <input type='hidden' name='action' value='submit' />
98 <input type='hidden' name='source' value='interwiki' />
99 <select name='interwiki'>
101 foreach( $wgImportSources as $interwiki ) {
102 $iw = htmlspecialchars( $interwiki );
103 $wgOut->addHTML( "<option value=\"$iw\">$iw</option>\n" );
107 <input name='frompage' />
108 <input type='submit' />
118 * @subpackage SpecialPage
123 var $timestamp = "20010115000000";
130 function setTitle( $text ) {
131 $this->title
= Title
::newFromText( $text );
134 function setID( $id ) {
138 function setTimestamp( $ts ) {
139 # 2003-08-05T18:30:02Z
140 $this->timestamp
= wfTimestamp( TS_MW
, $ts );
143 function setUsername( $user ) {
144 $this->user_text
= $user;
147 function setUserIP( $ip ) {
148 $this->user_text
= $ip;
151 function setText( $text ) {
155 function setComment( $text ) {
156 $this->comment
= $text;
159 function setMinor( $minor ) {
160 $this->minor
= (bool)$minor;
163 function getTitle() {
171 function getTimestamp() {
172 return $this->timestamp
;
176 return $this->user_text
;
183 function getComment() {
184 return $this->comment
;
187 function getMinor() {
191 function importOldRevision() {
192 $fname = "WikiImporter::importOldRevision";
193 $dbw =& wfGetDB( DB_MASTER
);
195 # Sneak a single revision into place
196 $user = User
::newFromName( $this->getUser() );
198 $userId = intval( $user->getId() );
199 $userText = $user->getName();
202 $userText = $this->getUser();
205 // avoid memory leak...?
206 $linkCache =& LinkCache
::singleton();
209 $article = new Article( $this->title
);
210 $pageId = $article->getId();
212 # must create the page...
213 $pageId = $article->insertOn( $dbw );
216 # FIXME: Check for exact conflicts
217 # FIXME: Use original rev_id optionally
218 # FIXME: blah blah blah
220 #if( $numrows > 0 ) {
221 # return wfMsg( "importhistoryconflict" );
225 $revision = new Revision( array(
227 'text' => $this->getText(),
228 'comment' => $this->getComment(),
230 'user_text' => $userText,
231 'timestamp' => $this->timestamp
,
232 'minor_edit' => $this->minor
,
234 $revId = $revision->insertOn( $dbw );
235 $article->updateIfNewerOn( $dbw, $revision );
245 * @subpackage SpecialPage
249 var $mPageCallback = null;
250 var $mRevisionCallback = null;
253 function WikiImporter( $source ) {
254 $this->setRevisionCallback( array( &$this, "importRevision" ) );
255 $this->mSource
= $source;
258 function throwXmlError( $err ) {
259 $this->debug( "FAILURE: $err" );
260 wfDebug( "WikiImporter XML error: $err\n" );
265 function doImport() {
266 if( empty( $this->mSource
) ) {
267 return new WikiErrorMsg( "importnotext" );
270 $parser = xml_parser_create( "UTF-8" );
272 # case folding violates XML standard, turn it off
273 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING
, false );
275 xml_set_object( $parser, $this );
276 xml_set_element_handler( $parser, "in_start", "" );
278 $offset = 0; // for context extraction on error reporting
280 $chunk = $this->mSource
->readChunk();
281 if( !xml_parse( $parser, $chunk, $this->mSource
->atEnd() ) ) {
282 wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
283 return new WikiXmlError( $parser, 'XML import parse failure', $chunk, $offset );
285 $offset +
= strlen( $chunk );
286 } while( $chunk !== false && !$this->mSource
->atEnd() );
287 xml_parser_free( $parser );
292 function debug( $data ) {
293 #wfDebug( "IMPORT: $data\n" );
296 function notice( $data ) {
297 global $wgCommandLineMode;
298 if( $wgCommandLineMode ) {
302 $wgOut->addHTML( "<li>$data</li>\n" );
307 * Sets the action to perform as each new page in the stream is reached.
308 * @param callable $callback
311 function setPageCallback( $callback ) {
312 $previous = $this->mPageCallback
;
313 $this->mPageCallback
= $callback;
318 * Sets the action to perform as each page revision is reached.
319 * @param callable $callback
322 function setRevisionCallback( $callback ) {
323 $previous = $this->mRevisionCallback
;
324 $this->mRevisionCallback
= $callback;
329 * Default per-revision callback, performs the import.
330 * @param WikiRevision $revision
333 function importRevision( &$revision ) {
334 $dbw =& wfGetDB( DB_MASTER
);
335 $dbw->deadlockLoop( array( &$revision, 'importOldRevision' ) );
339 * Alternate per-revision callback, for debugging.
340 * @param WikiRevision $revision
343 function debugRevisionHandler( &$revision ) {
344 $this->debug( "Got revision:" );
345 if( is_object( $revision->title
) ) {
346 $this->debug( "-- Title: " . $revision->title
->getPrefixedText() );
348 $this->debug( "-- Title: <invalid>" );
350 $this->debug( "-- User: " . $revision->user_text
);
351 $this->debug( "-- Timestamp: " . $revision->timestamp
);
352 $this->debug( "-- Comment: " . $revision->comment
);
353 $this->debug( "-- Text: " . $revision->text
);
357 * Notify the callback function when a new <page> is reached.
358 * @param Title $title
361 function pageCallback( $title ) {
362 if( is_callable( $this->mPageCallback
) ) {
363 call_user_func( $this->mPageCallback
, $title );
368 # XML parser callbacks from here out -- beware!
369 function donothing( $parser, $x, $y="" ) {
370 #$this->debug( "donothing" );
373 function in_start( $parser, $name, $attribs ) {
374 $this->debug( "in_start $name" );
375 if( $name != "mediawiki" ) {
376 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
378 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
381 function in_mediawiki( $parser, $name, $attribs ) {
382 $this->debug( "in_mediawiki $name" );
383 if( $name == 'siteinfo' ) {
384 xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
385 } elseif( $name == 'page' ) {
386 xml_set_element_handler( $parser, "in_page", "out_page" );
388 return $this->throwXMLerror( "Expected <page>, got <$name>" );
391 function out_mediawiki( $parser, $name ) {
392 $this->debug( "out_mediawiki $name" );
393 if( $name != "mediawiki" ) {
394 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
396 xml_set_element_handler( $parser, "donothing", "donothing" );
400 function in_siteinfo( $parser, $name, $attribs ) {
402 $this->debug( "in_siteinfo $name" );
412 return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
416 function out_siteinfo( $parser, $name ) {
417 if( $name == "siteinfo" ) {
418 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
423 function in_page( $parser, $name, $attribs ) {
424 $this->debug( "in_page $name" );
429 $this->appendfield
= $name;
430 $this->appenddata
= "";
431 $this->parenttag
= "page";
432 xml_set_element_handler( $parser, "in_nothing", "out_append" );
433 xml_set_character_data_handler( $parser, "char_append" );
436 $this->workRevision
= new WikiRevision
;
437 $this->workRevision
->setTitle( $this->workTitle
);
438 xml_set_element_handler( $parser, "in_revision", "out_revision" );
441 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
445 function out_page( $parser, $name ) {
446 $this->debug( "out_page $name" );
447 if( $name != "page" ) {
448 return $this->throwXMLerror( "Expected </page>, got </$name>" );
450 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
452 $this->workTitle
= NULL;
453 $this->workRevision
= NULL;
456 function in_nothing( $parser, $name, $attribs ) {
457 $this->debug( "in_nothing $name" );
458 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
460 function char_append( $parser, $data ) {
461 $this->debug( "char_append '$data'" );
462 $this->appenddata
.= $data;
464 function out_append( $parser, $name ) {
465 $this->debug( "out_append $name" );
466 if( $name != $this->appendfield
) {
467 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
469 xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" );
470 xml_set_character_data_handler( $parser, "donothing" );
472 switch( $this->appendfield
) {
474 $this->workTitle
= $this->appenddata
;
475 $this->pageCallback( $this->workTitle
);
478 if ( $this->parenttag
== 'revision' ) {
479 $this->workRevision
->setID( $this->appenddata
);
483 $this->workRevision
->setText( $this->appenddata
);
486 $this->workRevision
->setUsername( $this->appenddata
);
489 $this->workRevision
->setUserIP( $this->appenddata
);
492 $this->workRevision
->setTimestamp( $this->appenddata
);
495 $this->workRevision
->setComment( $this->appenddata
);
498 $this->workRevision
->setMinor( true );
501 $this->debug( "Bad append: {$this->appendfield}" );
503 $this->appendfield
= "";
504 $this->appenddata
= "";
507 function in_revision( $parser, $name, $attribs ) {
508 $this->debug( "in_revision $name" );
515 $this->parenttag
= "revision";
516 $this->appendfield
= $name;
517 xml_set_element_handler( $parser, "in_nothing", "out_append" );
518 xml_set_character_data_handler( $parser, "char_append" );
521 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
524 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
528 function out_revision( $parser, $name ) {
529 $this->debug( "out_revision $name" );
530 if( $name != "revision" ) {
531 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
533 xml_set_element_handler( $parser, "in_page", "out_page" );
535 $out = call_user_func_array( $this->mRevisionCallback
,
536 array( &$this->workRevision
, &$this ) );
537 if( !empty( $out ) ) {
539 $wgOut->addHTML( "<li>" . $out . "</li>\n" );
543 function in_contributor( $parser, $name, $attribs ) {
544 $this->debug( "in_contributor $name" );
549 $this->parenttag
= "contributor";
550 $this->appendfield
= $name;
551 xml_set_element_handler( $parser, "in_nothing", "out_append" );
552 xml_set_character_data_handler( $parser, "char_append" );
555 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
559 function out_contributor( $parser, $name ) {
560 $this->debug( "out_contributor $name" );
561 if( $name != "contributor" ) {
562 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
564 xml_set_element_handler( $parser, "in_revision", "out_revision" );
569 /** @package MediaWiki */
570 class ImportStringSource
{
571 function ImportStringSource( $string ) {
572 $this->mString
= $string;
573 $this->mRead
= false;
580 function readChunk() {
581 if( $this->atEnd() ) {
585 return $this->mString
;
590 /** @package MediaWiki */
591 class ImportStreamSource
{
592 function ImportStreamSource( $handle ) {
593 $this->mHandle
= $handle;
597 return feof( $this->mHandle
);
600 function readChunk() {
601 return fread( $this->mHandle
, 32768 );
604 function newFromFile( $filename ) {
605 $file = @fopen
( $filename, 'rt' );
607 return new WikiError( "Couldn't open import file" );
609 return new ImportStreamSource( $file );
612 function newFromUpload( $fieldname = "xmlimport" ) {
613 $upload =& $_FILES[$fieldname];
615 if( !isset( $upload ) ||
!$upload['name'] ) {
616 return new WikiErrorMsg( 'importnofile' );
618 if( !empty( $upload['error'] ) ) {
619 return new WikiErrorMsg( 'importuploaderror', $upload['error'] );
621 $fname = $upload['tmp_name'];
622 if( is_uploaded_file( $fname ) ) {
623 return ImportStreamSource
::newFromFile( $fname );
625 return new WikiErrorMsg( 'importnofile' );
629 function newFromURL( $url ) {
630 # fopen-wrappers are normally turned off for security.
631 ini_set( "allow_url_fopen", true );
632 $ret = ImportStreamSource
::newFromFile( $url );
633 ini_set( "allow_url_fopen", false );
637 function newFromInterwiki( $interwiki, $page ) {
638 $base = Title
::getInterwikiLink( $interwiki );
639 if( empty( $base ) ) {
640 return new WikiError( 'Bad interwiki link' );
642 $import = wfUrlencode( "Special:Export/$page" );
643 $url = str_replace( "$1", $import, $base );
644 return ImportStreamSource
::newFromURL( $url );