includes/specials/SpecialExport.php

   1 <?php
   2 /**
   3  * Implements Special:Export
   4  *
   5  * Copyright © 2003-2008 Brion Vibber <brion@pobox.com>
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License along
  18  * with this program; if not, write to the Free Software Foundation, Inc.,
  19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20  * http://www.gnu.org/copyleft/gpl.html
  21  *
  22  * @file
  23  * @ingroup SpecialPage
  24  */
  25
  26 /**
  27  * A special page that allows users to export pages in a XML file
  28  *
  29  * @ingroup SpecialPage
  30  */
  31 class SpecialExport extends SpecialPage {
  32
  33         private $curonly, $doExport, $pageLinkDepth, $templates;
  34         private $images;
  35
  36         public function __construct() {
  37                 parent::__construct( 'Export' );
  38         }
  39
  40         public function execute( $par ) {
  41                 global $wgSitename, $wgExportAllowListContributors, $wgExportFromNamespaces;
  42                 global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth;
  43                 global $wgExportAllowAll;
  44
  45                 $this->setHeaders();
  46                 $this->outputHeader();
  47
  48                 // Set some variables
  49                 $this->curonly = true;
  50                 $this->doExport = false;
  51                 $request = $this->getRequest();
  52                 $this->templates = $request->getCheck( 'templates' );
  53                 $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet
  54                 $this->pageLinkDepth = $this->validateLinkDepth(
  55                         $request->getIntOrNull( 'pagelink-depth' )
  56                 );
  57                 $nsindex = '';
  58                 $exportall = false;
  59
  60                 if ( $request->getCheck( 'addcat' ) ) {
  61                         $page = $request->getText( 'pages' );
  62                         $catname = $request->getText( 'catname' );
  63
  64                         if ( $catname !== '' && $catname !== null && $catname !== false ) {
  65                                 $t = Title::makeTitleSafe( NS_MAIN, $catname );
  66                                 if ( $t ) {
  67                                         /**
  68                                          * @todo FIXME: This can lead to hitting memory limit for very large
  69                                          * categories. Ideally we would do the lookup synchronously
  70                                          * during the export in a single query.
  71                                          */
  72                                         $catpages = $this->getPagesFromCategory( $t );
  73                                         if ( $catpages ) {
  74                                                 $page .= "\n" . implode( "\n", $catpages );
  75                                         }
  76                                 }
  77                         }
  78                 }
  79                 elseif( $request->getCheck( 'addns' ) && $wgExportFromNamespaces ) {
  80                         $page = $request->getText( 'pages' );
  81                         $nsindex = $request->getText( 'nsindex', '' );
  82
  83                         if ( strval( $nsindex ) !== ''  ) {
  84                                 /**
  85                                  * Same implementation as above, so same @todo
  86                                  */
  87                                 $nspages = $this->getPagesFromNamespace( $nsindex );
  88                                 if ( $nspages ) {
  89                                         $page .= "\n" . implode( "\n", $nspages );
  90                                 }
  91                         }
  92                 }
  93                 elseif( $request->getCheck( 'exportall' ) && $wgExportAllowAll ) {
  94                         $this->doExport = true;
  95                         $exportall = true;
  96                 }
  97                 elseif( $request->wasPosted() && $par == '' ) {
  98                         $page = $request->getText( 'pages' );
  99                         $this->curonly = $request->getCheck( 'curonly' );
 100                         $rawOffset = $request->getVal( 'offset' );
 101
 102                         if( $rawOffset ) {
 103                                 $offset = wfTimestamp( TS_MW, $rawOffset );
 104                         } else {
 105                                 $offset = null;
 106                         }
 107
 108                         $limit = $request->getInt( 'limit' );
 109                         $dir = $request->getVal( 'dir' );
 110                         $history = array(
 111                                 'dir' => 'asc',
 112                                 'offset' => false,
 113                                 'limit' => $wgExportMaxHistory,
 114                         );
 115                         $historyCheck = $request->getCheck( 'history' );
 116
 117                         if ( $this->curonly ) {
 118                                 $history = WikiExporter::CURRENT;
 119                         } elseif ( !$historyCheck ) {
 120                                 if ( $limit > 0 && ($wgExportMaxHistory == 0 || $limit < $wgExportMaxHistory ) ) {
 121                                         $history['limit'] = $limit;
 122                                 }
 123                                 if ( !is_null( $offset ) ) {
 124                                         $history['offset'] = $offset;
 125                                 }
 126                                 if ( strtolower( $dir ) == 'desc' ) {
 127                                         $history['dir'] = 'desc';
 128                                 }
 129                         }
 130
 131                         if( $page != '' ) {
 132                                 $this->doExport = true;
 133                         }
 134                 } else {
 135                         // Default to current-only for GET requests.
 136                         $page = $request->getText( 'pages', $par );
 137                         $historyCheck = $request->getCheck( 'history' );
 138
 139                         if( $historyCheck ) {
 140                                 $history = WikiExporter::FULL;
 141                         } else {
 142                                 $history = WikiExporter::CURRENT;
 143                         }
 144
 145                         if( $page != '' ) {
 146                                 $this->doExport = true;
 147                         }
 148                 }
 149
 150                 if( !$wgExportAllowHistory ) {
 151                         // Override
 152                         $history = WikiExporter::CURRENT;
 153                 }
 154
 155                 $list_authors = $request->getCheck( 'listauthors' );
 156                 if ( !$this->curonly || !$wgExportAllowListContributors ) {
 157                         $list_authors = false ;
 158                 }
 159
 160                 if ( $this->doExport ) {
 161                         $this->getOutput()->disable();
 162
 163                         // Cancel output buffering and gzipping if set
 164                         // This should provide safer streaming for pages with history
 165                         wfResetOutputBuffers();
 166                         $request->response()->header( "Content-type: application/xml; charset=utf-8" );
 167
 168                         if( $request->getCheck( 'wpDownload' ) ) {
 169                                 // Provide a sane filename suggestion
 170                                 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
 171                                 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
 172                         }
 173
 174                         $this->doExport( $page, $history, $list_authors, $exportall );
 175
 176                         return;
 177                 }
 178
 179                 $out = $this->getOutput();
 180                 $out->addWikiMsg( 'exporttext' );
 181
 182                 $form = Xml::openElement( 'form', array( 'method' => 'post',
 183                         'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) );
 184                 $form .= Xml::inputLabel( $this->msg( 'export-addcattext' )->text(), 'catname', 'catname', 40 ) . '&#160;';
 185                 $form .= Xml::submitButton( $this->msg( 'export-addcat' )->text(), array( 'name' => 'addcat' ) ) . '<br />';
 186
 187                 if ( $wgExportFromNamespaces ) {
 188                         $form .= Html::namespaceSelector(
 189                                 array(
 190                                         'selected' => $nsindex,
 191                                         'label' => $this->msg( 'export-addnstext' )->text()
 192                                 ), array(
 193                                         'name'  => 'nsindex',
 194                                         'id'    => 'namespace',
 195                                         'class' => 'namespaceselector',
 196                                 )
 197                         ) . '&#160;';
 198                         $form .= Xml::submitButton( $this->msg( 'export-addns' )->text(), array( 'name' => 'addns' ) ) . '<br />';
 199                 }
 200
 201                 if ( $wgExportAllowAll ) {
 202                         $form .= Xml::checkLabel(
 203                                 $this->msg( 'exportall' )->text(),
 204                                 'exportall',
 205                                 'exportall',
 206                                 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false
 207                         ) . '<br />';
 208                 }
 209
 210                 $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false );
 211                 $form .= '<br />';
 212
 213                 if( $wgExportAllowHistory ) {
 214                         $form .= Xml::checkLabel(
 215                                 $this->msg( 'exportcuronly' )->text(),
 216                                 'curonly',
 217                                 'curonly',
 218                                 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true
 219                         ) . '<br />';
 220                 } else {
 221                         $out->addWikiMsg( 'exportnohistory' );
 222                 }
 223
 224                 $form .= Xml::checkLabel(
 225                         $this->msg( 'export-templates' )->text(),
 226                         'templates',
 227                         'wpExportTemplates',
 228                         $request->wasPosted() ? $request->getCheck( 'templates' ) : false
 229                 ) . '<br />';
 230
 231                 if( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) {
 232                         $form .= Xml::inputLabel( $this->msg( 'export-pagelinks' )->text(), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />';
 233                 }
 234                 // Enable this when we can do something useful exporting/importing image information. :)
 235                 //$form .= Xml::checkLabel( $this->msg( 'export-images' )->text(), 'images', 'wpExportImages', false ) . '<br />';
 236                 $form .= Xml::checkLabel(
 237                         $this->msg( 'export-download' )->text(),
 238                         'wpDownload',
 239                         'wpDownload',
 240                         $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true
 241                 ) . '<br />';
 242
 243                 if ( $wgExportAllowListContributors ) {
 244                         $form .= Xml::checkLabel(
 245                                 $this->msg( 'exportlistauthors' )->text(),
 246                                 'listauthors',
 247                                 'listauthors',
 248                                 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false
 249                         ) . '<br />';
 250                 }
 251
 252                 $form .= Xml::submitButton( $this->msg( 'export-submit' )->text(), Linker::tooltipAndAccesskeyAttribs( 'export' ) );
 253                 $form .= Xml::closeElement( 'form' );
 254
 255                 $out->addHTML( $form );
 256         }
 257
 258         /**
 259          * @return bool
 260          */
 261         private function userCanOverrideExportDepth() {
 262                 return $this->getUser()->isAllowed( 'override-export-depth' );
 263         }
 264
 265         /**
 266          * Do the actual page exporting
 267          *
 268          * @param $page String: user input on what page(s) to export
 269          * @param $history Mixed: one of the WikiExporter history export constants
 270          * @param $list_authors Boolean: Whether to add distinct author list (when
 271          *                      not returning full history)
 272          * @param $exportall Boolean: Whether to export everything
 273          */
 274         private function doExport( $page, $history, $list_authors, $exportall ) {
 275
 276                 // If we are grabbing everything, enable full history and ignore the rest
 277                 if ( $exportall ) {
 278                         $history = WikiExporter::FULL;
 279                 } else {
 280
 281                         $pageSet = array(); // Inverted index of all pages to look up
 282
 283                         // Split up and normalize input
 284                         foreach( explode( "\n", $page ) as $pageName ) {
 285                                 $pageName = trim( $pageName );
 286                                 $title = Title::newFromText( $pageName );
 287                                 if( $title && $title->getInterwiki() == '' && $title->getText() !== '' ) {
 288                                         // Only record each page once!
 289                                         $pageSet[$title->getPrefixedText()] = true;
 290                                 }
 291                         }
 292
 293                         // Set of original pages to pass on to further manipulation...
 294                         $inputPages = array_keys( $pageSet );
 295
 296                         // Look up any linked pages if asked...
 297                         if( $this->templates ) {
 298                                 $pageSet = $this->getTemplates( $inputPages, $pageSet );
 299                         }
 300                         $linkDepth = $this->pageLinkDepth;
 301                         if( $linkDepth ) {
 302                                 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
 303                         }
 304
 305                         /*
 306                          // Enable this when we can do something useful exporting/importing image information. :)
 307                          if( $this->images ) ) {
 308                          $pageSet = $this->getImages( $inputPages, $pageSet );
 309                          }
 310                         */
 311
 312                         $pages = array_keys( $pageSet );
 313
 314                         // Normalize titles to the same format and remove dupes, see bug 17374
 315                         foreach( $pages as $k => $v ) {
 316                                 $pages[$k] = str_replace( " ", "_", $v );
 317                         }
 318
 319                         $pages = array_unique( $pages );
 320                 }
 321
 322                 /* Ok, let's get to it... */
 323                 if( $history == WikiExporter::CURRENT ) {
 324                         $lb = false;
 325                         $db = wfGetDB( DB_SLAVE );
 326                         $buffer = WikiExporter::BUFFER;
 327                 } else {
 328                         // Use an unbuffered query; histories may be very long!
 329                         $lb = wfGetLBFactory()->newMainLB();
 330                         $db = $lb->getConnection( DB_SLAVE );
 331                         $buffer = WikiExporter::STREAM;
 332
 333                         // This might take a while... :D
 334                         wfSuppressWarnings();
 335                         set_time_limit(0);
 336                         wfRestoreWarnings();
 337                 }
 338
 339                 $exporter = new WikiExporter( $db, $history, $buffer );
 340                 $exporter->list_authors = $list_authors;
 341                 $exporter->openStream();
 342
 343                 if ( $exportall ) {
 344                         $exporter->allPages();
 345                 } else {
 346                         foreach( $pages as $page ) {
 347                         /*
 348                          if( $wgExportMaxHistory && !$this->curonly ) {
 349                          $title = Title::newFromText( $page );
 350                          if( $title ) {
 351                          $count = Revision::countByTitle( $db, $title );
 352                          if( $count > $wgExportMaxHistory ) {
 353                          wfDebug( __FUNCTION__ .
 354                          ": Skipped $page, $count revisions too big\n" );
 355                          continue;
 356                          }
 357                          }
 358                          }*/
 359                         #Bug 8824: Only export pages the user can read
 360                                 $title = Title::newFromText( $page );
 361                                 if( is_null( $title ) ) {
 362                                         continue; #TODO: perhaps output an <error> tag or something.
 363                                 }
 364                                 if( !$title->userCan( 'read', $this->getUser() ) ) {
 365                                         continue; #TODO: perhaps output an <error> tag or something.
 366                                 }
 367
 368                                 $exporter->pageByTitle( $title );
 369                         }
 370                 }
 371
 372                 $exporter->closeStream();
 373
 374                 if( $lb ) {
 375                         $lb->closeAll();
 376                 }
 377         }
 378
 379         /**
 380          * @param $title Title
 381          * @return array
 382          */
 383         private function getPagesFromCategory( $title ) {
 384                 global $wgContLang;
 385
 386                 $name = $title->getDBkey();
 387
 388                 $dbr = wfGetDB( DB_SLAVE );
 389                 $res = $dbr->select(
 390                         array( 'page', 'categorylinks' ),
 391                         array( 'page_namespace', 'page_title' ),
 392                         array( 'cl_from=page_id', 'cl_to' => $name ),
 393                         __METHOD__,
 394                         array( 'LIMIT' => '5000' )
 395                 );
 396
 397                 $pages = array();
 398
 399                 foreach ( $res as $row ) {
 400                         $n = $row->page_title;
 401                         if ($row->page_namespace) {
 402                                 $ns = $wgContLang->getNsText( $row->page_namespace );
 403                                 $n = $ns . ':' . $n;
 404                         }
 405
 406                         $pages[] = $n;
 407                 }
 408                 return $pages;
 409         }
 410
 411         /**
 412          * @param $nsindex int
 413          * @return array
 414          */
 415         private function getPagesFromNamespace( $nsindex ) {
 416                 global $wgContLang;
 417
 418                 $dbr = wfGetDB( DB_SLAVE );
 419                 $res = $dbr->select(
 420                         'page',
 421                         array( 'page_namespace', 'page_title' ),
 422                         array( 'page_namespace' => $nsindex ),
 423                         __METHOD__,
 424                         array( 'LIMIT' => '5000' )
 425                 );
 426
 427                 $pages = array();
 428
 429                 foreach ( $res as $row ) {
 430                         $n = $row->page_title;
 431
 432                         if ( $row->page_namespace ) {
 433                                 $ns = $wgContLang->getNsText( $row->page_namespace );
 434                                 $n = $ns . ':' . $n;
 435                         }
 436
 437                         $pages[] = $n;
 438                 }
 439                 return $pages;
 440         }
 441
 442         /**
 443          * Expand a list of pages to include templates used in those pages.
 444          * @param $inputPages array, list of titles to look up
 445          * @param $pageSet array, associative array indexed by titles for output
 446          * @return array associative array index by titles
 447          */
 448         private function getTemplates( $inputPages, $pageSet ) {
 449                 return $this->getLinks( $inputPages, $pageSet,
 450                         'templatelinks',
 451                         array( 'tl_namespace AS namespace', 'tl_title AS title' ),
 452                         array( 'page_id=tl_from' )
 453                 );
 454         }
 455
 456         /**
 457          * Validate link depth setting, if available.
 458          * @param $depth int
 459          * @return int
 460          */
 461         private function validateLinkDepth( $depth ) {
 462                 global $wgExportMaxLinkDepth;
 463
 464                 if( $depth < 0 ) {
 465                         return 0;
 466                 }
 467
 468                 if ( !$this->userCanOverrideExportDepth() ) {
 469                         if( $depth > $wgExportMaxLinkDepth ) {
 470                                 return $wgExportMaxLinkDepth;
 471                         }
 472                 }
 473
 474                 /*
 475                  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
 476                  * crazy-big export from being done by someone setting the depth
 477                  * number too high. In other words, last resort safety net.
 478                  */
 479                 return intval( min( $depth, 5 ) );
 480         }
 481
 482         /**
 483          * Expand a list of pages to include pages linked to from that page.
 484          * @param $inputPages array
 485          * @param $pageSet array
 486          * @param $depth int
 487          * @return array
 488          */
 489         private function getPageLinks( $inputPages, $pageSet, $depth ) {
 490                 for( ; $depth > 0; --$depth ) {
 491                         $pageSet = $this->getLinks(
 492                                 $inputPages, $pageSet, 'pagelinks',
 493                                 array( 'pl_namespace AS namespace', 'pl_title AS title' ),
 494                                 array( 'page_id=pl_from' )
 495                         );
 496                         $inputPages = array_keys( $pageSet );
 497                 }
 498
 499                 return $pageSet;
 500         }
 501
 502         /**
 503          * Expand a list of pages to include images used in those pages.
 504          *
 505          * @param $inputPages array, list of titles to look up
 506          * @param $pageSet array, associative array indexed by titles for output
 507          *
 508          * @return array associative array index by titles
 509          */
 510         private function getImages( $inputPages, $pageSet ) {
 511                 return $this->getLinks(
 512                         $inputPages,
 513                         $pageSet,
 514                         'imagelinks',
 515                         array( NS_FILE . ' AS namespace', 'il_to AS title' ),
 516                         array( 'page_id=il_from' )
 517                 );
 518         }
 519
 520         /**
 521          * Expand a list of pages to include items used in those pages.
 522          * @return array
 523          */
 524         private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
 525                 $dbr = wfGetDB( DB_SLAVE );
 526
 527                 foreach( $inputPages as $page ) {
 528                         $title = Title::newFromText( $page );
 529
 530                         if( $title ) {
 531                                 $pageSet[$title->getPrefixedText()] = true;
 532                                 /// @todo FIXME: May or may not be more efficient to batch these
 533                                 ///        by namespace when given multiple input pages.
 534                                 $result = $dbr->select(
 535                                         array( 'page', $table ),
 536                                         $fields,
 537                                         array_merge(
 538                                                 $join,
 539                                                 array(
 540                                                         'page_namespace' => $title->getNamespace(),
 541                                                         'page_title' => $title->getDBkey()
 542                                                 )
 543                                         ),
 544                                         __METHOD__
 545                                 );
 546
 547                                 foreach( $result as $row ) {
 548                                         $template = Title::makeTitle( $row->namespace, $row->title );
 549                                         $pageSet[$template->getPrefixedText()] = true;
 550                                 }
 551                         }
 552                 }
 553
 554                 return $pageSet;
 555         }
 556
 557 }