Merge "docs: Fix typo"
[mediawiki.git] / includes / page / MergeHistory.php
blobf6fbde8dd9dd1cb191caf3debe64e23d072d9c21
1 <?php
3 /**
4 * Copyright © 2015 Geoffrey Mon <geofbot@gmail.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
21 * @file
24 namespace MediaWiki\Page;
26 use InvalidArgumentException;
27 use ManualLogEntry;
28 use MediaWiki;
29 use MediaWiki\CommentStore\CommentStoreComment;
30 use MediaWiki\Content\Content;
31 use MediaWiki\Content\IContentHandlerFactory;
32 use MediaWiki\EditPage\SpamChecker;
33 use MediaWiki\HookContainer\HookContainer;
34 use MediaWiki\HookContainer\HookRunner;
35 use MediaWiki\Linker\LinkTargetLookup;
36 use MediaWiki\MainConfigNames;
37 use MediaWiki\MediaWikiServices;
38 use MediaWiki\Message\Message;
39 use MediaWiki\Permissions\Authority;
40 use MediaWiki\Permissions\PermissionStatus;
41 use MediaWiki\Revision\MutableRevisionRecord;
42 use MediaWiki\Revision\RevisionStore;
43 use MediaWiki\Revision\SlotRecord;
44 use MediaWiki\Status\Status;
45 use MediaWiki\Title\TitleFactory;
46 use MediaWiki\Title\TitleFormatter;
47 use MediaWiki\Title\TitleValue;
48 use MediaWiki\Utils\MWTimestamp;
49 use MediaWiki\Watchlist\WatchedItemStoreInterface;
50 use Wikimedia\Rdbms\IConnectionProvider;
51 use Wikimedia\Rdbms\IDatabase;
52 use Wikimedia\Timestamp\TimestampException;
54 /**
55 * Handles the backend logic of merging the histories of two
56 * pages.
58 * @since 1.27
60 class MergeHistory {
62 /** Maximum number of revisions that can be merged at once */
63 public const REVISION_LIMIT = 5000;
65 /** @var PageIdentity Page from which history will be merged */
66 protected $source;
68 /** @var PageIdentity Page to which history will be merged */
69 protected $dest;
71 /** @var IDatabase Database that we are using */
72 protected $dbw;
74 /** @var ?string Timestamp up to which history from the source will be merged */
75 private $timestamp;
77 /**
78 * @var MWTimestamp|false Maximum timestamp that we can use (oldest timestamp of dest).
79 * Use ::getMaxTimestamp to lazily initialize.
81 protected $maxTimestamp = false;
83 /**
84 * @var string|false|null SQL WHERE condition that selects source revisions
85 * to insert into destination. Use ::getTimeWhere to lazy-initialize.
87 protected $timeWhere = false;
89 /**
90 * @var MWTimestamp|false|null Timestamp upto which history from the source will be merged.
91 * Use getTimestampLimit to lazily initialize.
93 protected $timestampLimit = false;
95 /**
96 * @var string|null
98 private $revidLimit = null;
100 /** @var int Number of revisions merged (for Special:MergeHistory success message) */
101 protected $revisionsMerged;
103 private IContentHandlerFactory $contentHandlerFactory;
104 private RevisionStore $revisionStore;
105 private WatchedItemStoreInterface $watchedItemStore;
106 private SpamChecker $spamChecker;
107 private HookRunner $hookRunner;
108 private WikiPageFactory $wikiPageFactory;
109 private TitleFormatter $titleFormatter;
110 private TitleFactory $titleFactory;
111 private LinkTargetLookup $linkTargetLookup;
112 private DeletePageFactory $deletePageFactory;
115 * @param PageIdentity $source Page from which history will be merged
116 * @param PageIdentity $dest Page to which history will be merged
117 * @param ?string $timestamp Timestamp up to which history from the source will be merged
118 * @param IConnectionProvider $dbProvider
119 * @param IContentHandlerFactory $contentHandlerFactory
120 * @param RevisionStore $revisionStore
121 * @param WatchedItemStoreInterface $watchedItemStore
122 * @param SpamChecker $spamChecker
123 * @param HookContainer $hookContainer
124 * @param WikiPageFactory $wikiPageFactory
125 * @param TitleFormatter $titleFormatter
126 * @param TitleFactory $titleFactory
127 * @param LinkTargetLookup $linkTargetLookup
128 * @param DeletePageFactory $deletePageFactory
130 public function __construct(
131 PageIdentity $source,
132 PageIdentity $dest,
133 ?string $timestamp,
134 IConnectionProvider $dbProvider,
135 IContentHandlerFactory $contentHandlerFactory,
136 RevisionStore $revisionStore,
137 WatchedItemStoreInterface $watchedItemStore,
138 SpamChecker $spamChecker,
139 HookContainer $hookContainer,
140 WikiPageFactory $wikiPageFactory,
141 TitleFormatter $titleFormatter,
142 TitleFactory $titleFactory,
143 LinkTargetLookup $linkTargetLookup,
144 DeletePageFactory $deletePageFactory
146 // Save the parameters
147 $this->source = $source;
148 $this->dest = $dest;
149 $this->timestamp = $timestamp;
151 // Get the database
152 $this->dbw = $dbProvider->getPrimaryDatabase();
154 $this->contentHandlerFactory = $contentHandlerFactory;
155 $this->revisionStore = $revisionStore;
156 $this->watchedItemStore = $watchedItemStore;
157 $this->spamChecker = $spamChecker;
158 $this->hookRunner = new HookRunner( $hookContainer );
159 $this->wikiPageFactory = $wikiPageFactory;
160 $this->titleFormatter = $titleFormatter;
161 $this->titleFactory = $titleFactory;
162 $this->linkTargetLookup = $linkTargetLookup;
163 $this->deletePageFactory = $deletePageFactory;
167 * Get the number of revisions that will be moved
168 * @return int
170 public function getRevisionCount() {
171 $count = $this->dbw->newSelectQueryBuilder()
172 ->select( '1' )
173 ->from( 'revision' )
174 ->where( [ 'rev_page' => $this->source->getId(), $this->getTimeWhere() ] )
175 ->limit( self::REVISION_LIMIT + 1 )
176 ->caller( __METHOD__ )->fetchRowCount();
178 return $count;
182 * Get the number of revisions that were moved
183 * Used in the SpecialMergeHistory success message
184 * @return int
186 public function getMergedRevisionCount() {
187 return $this->revisionsMerged;
191 * @param callable $authorizer ( string $action, PageIdentity $target, PermissionStatus $status )
192 * @param Authority $performer
193 * @param string $reason
194 * @return PermissionStatus
196 private function authorizeInternal(
197 callable $authorizer,
198 Authority $performer,
199 string $reason
201 $status = PermissionStatus::newEmpty();
203 $authorizer( 'edit', $this->source, $status );
204 $authorizer( 'edit', $this->dest, $status );
206 // Anti-spam
207 if ( $this->spamChecker->checkSummary( $reason ) !== false ) {
208 // This is kind of lame, won't display nice
209 $status->fatal( 'spamprotectiontext' );
212 // Check mergehistory permission
213 if ( !$performer->isAllowed( 'mergehistory' ) ) {
214 // User doesn't have the right to merge histories
215 $status->fatal( 'mergehistory-fail-permission' );
217 return $status;
221 * Check whether $performer can execute the merge.
223 * @note this method does not guarantee full permissions check, so it should
224 * only be used to to decide whether to show a merge form. To authorize the merge
225 * action use {@link self::authorizeMerge} instead.
227 * @param Authority $performer
228 * @param string|null $reason
229 * @return PermissionStatus
231 public function probablyCanMerge( Authority $performer, ?string $reason = null ): PermissionStatus {
232 return $this->authorizeInternal(
233 static function ( string $action, PageIdentity $target, PermissionStatus $status ) use ( $performer ) {
234 return $performer->probablyCan( $action, $target, $status );
236 $performer,
237 $reason
242 * Authorize the merge by $performer.
244 * @note this method should be used right before the actual merge is performed.
245 * To check whether a current performer has the potential to merge the history,
246 * use {@link self::probablyCanMerge} instead.
248 * @param Authority $performer
249 * @param string|null $reason
250 * @return PermissionStatus
252 public function authorizeMerge( Authority $performer, ?string $reason = null ): PermissionStatus {
253 return $this->authorizeInternal(
254 static function ( string $action, PageIdentity $target, PermissionStatus $status ) use ( $performer ) {
255 return $performer->authorizeWrite( $action, $target, $status );
257 $performer,
258 $reason
263 * Does various checks that the merge is
264 * valid. Only things based on the two pages
265 * should be checked here.
267 * @return Status
269 public function isValidMerge() {
270 $status = new Status();
272 // If either article ID is 0, then revisions cannot be reliably selected
273 if ( $this->source->getId() === 0 ) {
274 $status->fatal( 'mergehistory-fail-invalid-source' );
276 if ( $this->dest->getId() === 0 ) {
277 $status->fatal( 'mergehistory-fail-invalid-dest' );
280 // Make sure page aren't the same
281 if ( $this->source->isSamePageAs( $this->dest ) ) {
282 $status->fatal( 'mergehistory-fail-self-merge' );
285 // Make sure the timestamp is valid
286 if ( !$this->getTimestampLimit() ) {
287 $status->fatal( 'mergehistory-fail-bad-timestamp' );
290 // $this->timestampLimit must be older than $this->maxTimestamp
291 if ( $this->getTimestampLimit() > $this->getMaxTimestamp() ) {
292 $status->fatal( 'mergehistory-fail-timestamps-overlap' );
295 // Check that there are not too many revisions to move
296 if ( $this->getTimestampLimit() && $this->getRevisionCount() > self::REVISION_LIMIT ) {
297 $status->fatal( 'mergehistory-fail-toobig', Message::numParam( self::REVISION_LIMIT ) );
300 return $status;
304 * Actually attempt the history move
306 * @todo if all versions of page A are moved to B and then a user
307 * tries to do a reverse-merge via the "unmerge" log link, then page
308 * A will still be a redirect (as it was after the original merge),
309 * though it will have the old revisions back from before (as expected).
310 * The user may have to "undo" the redirect manually to finish the "unmerge".
311 * Maybe this should delete redirects at the source page of merges?
313 * @param Authority $performer
314 * @param string $reason
315 * @return Status status of the history merge
317 public function merge( Authority $performer, $reason = '' ) {
318 $status = new Status();
320 // Check validity and permissions required for merge
321 $validCheck = $this->isValidMerge(); // Check this first to check for null pages
322 if ( !$validCheck->isOK() ) {
323 return $validCheck;
325 $permCheck = $this->authorizeMerge( $performer, $reason );
326 if ( !$permCheck->isOK() ) {
327 return Status::wrap( $permCheck );
330 $this->dbw->startAtomic( __METHOD__ );
332 $this->dbw->newUpdateQueryBuilder()
333 ->update( 'revision' )
334 ->set( [ 'rev_page' => $this->dest->getId() ] )
335 ->where( [ 'rev_page' => $this->source->getId(), $this->getTimeWhere() ] )
336 ->caller( __METHOD__ )->execute();
338 // Check if this did anything
339 $this->revisionsMerged = $this->dbw->affectedRows();
340 if ( $this->revisionsMerged < 1 ) {
341 $this->dbw->endAtomic( __METHOD__ );
342 return $status->fatal( 'mergehistory-fail-no-change' );
345 $haveRevisions = $this->dbw->newSelectQueryBuilder()
346 ->from( 'revision' )
347 ->where( [ 'rev_page' => $this->source->getId() ] )
348 ->forUpdate()
349 ->caller( __METHOD__ )
350 ->fetchRowCount();
352 $legacySource = $this->titleFactory->newFromPageIdentity( $this->source );
353 $legacyDest = $this->titleFactory->newFromPageIdentity( $this->dest );
355 // Update source page, histories and invalidate caches
356 if ( !$haveRevisions ) {
357 if ( $reason ) {
358 $revisionComment = wfMessage(
359 'mergehistory-comment',
360 $this->titleFormatter->getPrefixedText( $this->source ),
361 $this->titleFormatter->getPrefixedText( $this->dest ),
362 $reason
363 )->inContentLanguage()->text();
364 } else {
365 $revisionComment = wfMessage(
366 'mergehistory-autocomment',
367 $this->titleFormatter->getPrefixedText( $this->source ),
368 $this->titleFormatter->getPrefixedText( $this->dest )
369 )->inContentLanguage()->text();
372 $this->updateSourcePage( $status, $performer, $revisionComment );
374 } else {
375 $legacySource->invalidateCache();
377 $legacyDest->invalidateCache();
379 // Duplicate watchers of the old article to the new article
380 $this->watchedItemStore->duplicateAllAssociatedEntries( $this->source, $this->dest );
382 // Update our logs
383 $logEntry = new ManualLogEntry( 'merge', 'merge' );
384 $logEntry->setPerformer( $performer->getUser() );
385 $logEntry->setComment( $reason );
386 $logEntry->setTarget( $this->source );
387 $logEntry->setParameters( [
388 '4::dest' => $this->titleFormatter->getPrefixedText( $this->dest ),
389 '5::mergepoint' => $this->getTimestampLimit()->getTimestamp( TS_MW ),
390 '6::mergerevid' => $this->revidLimit
391 ] );
392 $logId = $logEntry->insert();
393 $logEntry->publish( $logId );
395 $this->hookRunner->onArticleMergeComplete( $legacySource, $legacyDest );
397 $this->dbw->endAtomic( __METHOD__ );
399 return $status;
403 * Do various cleanup work and updates to the source page. This method
404 * will only be called if no revision is remaining on the page.
406 * At the end, there would be either a redirect page or a deleted page,
407 * depending on whether the content model of the page supports redirects or not.
409 * @param Status $status
410 * @param Authority $performer
411 * @param string $revisionComment Edit summary for the redirect or empty revision
412 * to be created in place of the source page
414 private function updateSourcePage( $status, $performer, $revisionComment ): void {
415 $deleteSource = false;
416 $legacySourceTitle = $this->titleFactory->newFromPageIdentity( $this->source );
417 $legacyDestTitle = $this->titleFactory->newFromPageIdentity( $this->dest );
418 $sourceModel = $legacySourceTitle->getContentModel();
419 $contentHandler = $this->contentHandlerFactory->getContentHandler( $sourceModel );
421 if ( !$contentHandler->supportsRedirects() || (
422 // Do not create redirects for wikitext message overrides (T376399).
423 // Maybe one day they will have a custom content model and this special case won't be needed.
424 $legacySourceTitle->getNamespace() === NS_MEDIAWIKI &&
425 $legacySourceTitle->getContentModel() === CONTENT_MODEL_WIKITEXT
426 ) ) {
427 $deleteSource = true;
428 $newContent = $contentHandler->makeEmptyContent();
429 } else {
430 $msg = wfMessage( 'mergehistory-redirect-text' )->inContentLanguage()->plain();
431 $newContent = $contentHandler->makeRedirectContent( $legacyDestTitle, $msg );
434 if ( !$newContent instanceof Content ) {
435 // Handler supports redirect but cannot create new redirect content?
436 // Not possible to proceed without Content.
438 // @todo. Remove this once there's no evidence it's happening or if it's
439 // determined all violating handlers have been fixed.
440 // This is mostly kept because previous code was also blindly checking
441 // existing of the Content for both content models that supports redirects
442 // and those that that don't, so it's hard to know what it was masking.
443 $logger = MediaWiki\Logger\LoggerFactory::getInstance( 'ContentHandler' );
444 $logger->warning(
445 'ContentHandler for {model} says it supports redirects but failed '
446 . 'to return Content object from ContentHandler::makeRedirectContent().'
447 . ' {value} returned instead.',
449 'value' => get_debug_type( $newContent ),
450 'model' => $sourceModel
454 throw new InvalidArgumentException(
455 "ContentHandler for '$sourceModel' supports redirects" .
456 ' but cannot create redirect content during History merge.'
460 // T263340/T93469: Create revision record to also serve as the page revision.
461 // This revision will be used to create page content. If the source page's
462 // content model supports redirects, then it will be the redirect content.
463 // If the content model does not supports redirect, this content will aid
464 // proper deletion of the page below.
465 $comment = CommentStoreComment::newUnsavedComment( $revisionComment );
466 $revRecord = new MutableRevisionRecord( $this->source );
467 $revRecord->setContent( SlotRecord::MAIN, $newContent )
468 ->setPageId( $this->source->getId() )
469 ->setComment( $comment )
470 ->setUser( $performer->getUser() )
471 ->setTimestamp( wfTimestampNow() );
473 $insertedRevRecord = $this->revisionStore->insertRevisionOn( $revRecord, $this->dbw );
475 $newPage = $this->wikiPageFactory->newFromTitle( $this->source );
476 $newPage->updateRevisionOn( $this->dbw, $insertedRevRecord );
478 if ( !$deleteSource ) {
479 // TODO: This doesn't belong here, it should be part of PageLinksTable.
480 // We have created a redirect page so let's
481 // record the link from the page to the new title.
482 // It should have no other outgoing links...
483 $this->dbw->newDeleteQueryBuilder()
484 ->deleteFrom( 'pagelinks' )
485 ->where( [ 'pl_from' => $this->source->getId() ] )
486 ->caller( __METHOD__ )->execute();
487 $migrationStage = MediaWikiServices::getInstance()->getMainConfig()->get(
488 MainConfigNames::PageLinksSchemaMigrationStage
490 $row = [
491 'pl_from' => $this->source->getId(),
492 'pl_from_namespace' => $this->source->getNamespace(),
494 if ( $migrationStage & SCHEMA_COMPAT_WRITE_OLD ) {
495 $row['pl_namespace'] = $this->dest->getNamespace();
496 $row['pl_title'] = $this->dest->getDBkey();
498 if ( $migrationStage & SCHEMA_COMPAT_WRITE_NEW ) {
499 $row['pl_target_id'] = $this->linkTargetLookup->acquireLinkTargetId(
500 new TitleValue( $this->dest->getNamespace(), $this->dest->getDBkey() ),
501 $this->dbw
504 $this->dbw->newInsertQueryBuilder()
505 ->insertInto( 'pagelinks' )
506 ->row( $row )
507 ->caller( __METHOD__ )->execute();
509 } else {
510 // T263340/T93469: Delete the source page to prevent errors because its
511 // revisions are now tied to a different title and its content model
512 // does not support redirects, so we cannot leave a new revision on it.
513 // This deletion does not depend on userright but may still fails. If it
514 // fails, it will be communicated in the status response.
515 $reason = wfMessage( 'mergehistory-source-deleted-reason' )->inContentLanguage()->plain();
516 $delPage = $this->deletePageFactory->newDeletePage( $newPage, $performer );
517 $deletionStatus = $delPage->deleteUnsafe( $reason );
518 if ( $deletionStatus->isGood() && $delPage->deletionsWereScheduled()[DeletePage::PAGE_BASE] ) {
519 $deletionStatus->warning(
520 'delete-scheduled',
521 wfEscapeWikiText( $newPage->getTitle()->getPrefixedText() )
524 // Notify callers that the source page has been deleted.
525 $status->value = 'source-deleted';
526 $status->merge( $deletionStatus );
531 * Get the maximum timestamp that we can use (oldest timestamp of dest)
533 private function getMaxTimestamp(): MWTimestamp {
534 if ( $this->maxTimestamp === false ) {
535 $this->initTimestampLimits();
537 return $this->maxTimestamp;
541 * Get the timestamp upto which history from the source will be merged,
542 * or null if something went wrong
544 private function getTimestampLimit(): ?MWTimestamp {
545 if ( $this->timestampLimit === false ) {
546 $this->initTimestampLimits();
548 return $this->timestampLimit;
552 * Get the SQL WHERE condition that selects source revisions to insert into destination,
553 * or null if something went wrong
555 private function getTimeWhere(): ?string {
556 if ( $this->timeWhere === false ) {
557 $this->initTimestampLimits();
559 return $this->timeWhere;
563 * Lazily initializes timestamp (and possibly revid) limits and conditions.
565 private function initTimestampLimits() {
566 // Max timestamp should be min of destination page
567 $firstDestTimestamp = $this->dbw->newSelectQueryBuilder()
568 ->select( 'MIN(rev_timestamp)' )
569 ->from( 'revision' )
570 ->where( [ 'rev_page' => $this->dest->getId() ] )
571 ->caller( __METHOD__ )->fetchField();
572 $this->maxTimestamp = new MWTimestamp( $firstDestTimestamp );
573 $this->revidLimit = null;
574 // Get the timestamp pivot condition
575 try {
576 if ( $this->timestamp ) {
577 $parts = explode( '|', $this->timestamp );
578 if ( count( $parts ) == 2 ) {
579 $timestamp = $parts[0];
580 $this->revidLimit = $parts[1];
581 } else {
582 $timestamp = $this->timestamp;
584 // If we have a requested timestamp, use the
585 // latest revision up to that point as the insertion point
586 $mwTimestamp = new MWTimestamp( $timestamp );
588 $lastWorkingTimestamp = $this->dbw->newSelectQueryBuilder()
589 ->select( 'MAX(rev_timestamp)' )
590 ->from( 'revision' )
591 ->where( [
592 $this->dbw->expr( 'rev_timestamp', '<=', $this->dbw->timestamp( $mwTimestamp ) ),
593 'rev_page' => $this->source->getId()
595 ->caller( __METHOD__ )->fetchField();
596 $mwLastWorkingTimestamp = new MWTimestamp( $lastWorkingTimestamp );
598 $timeInsert = $mwLastWorkingTimestamp;
599 $this->timestampLimit = $mwLastWorkingTimestamp;
600 } else {
601 // If we don't, merge entire source page history into the
602 // beginning of destination page history
604 // Get the latest timestamp of the source
605 $row = $this->dbw->newSelectQueryBuilder()
606 ->select( [ 'rev_timestamp', 'rev_id' ] )
607 ->from( 'page' )
608 ->join( 'revision', null, 'page_latest = rev_id' )
609 ->where( [ 'page_id' => $this->source->getId() ] )
610 ->caller( __METHOD__ )->fetchRow();
611 $timeInsert = $this->maxTimestamp;
612 if ( $row ) {
613 $lasttimestamp = new MWTimestamp( $row->rev_timestamp );
614 $this->timestampLimit = $lasttimestamp;
615 $this->revidLimit = $row->rev_id;
616 } else {
617 $this->timestampLimit = null;
620 $dbLimit = $this->dbw->timestamp( $timeInsert );
621 if ( $this->revidLimit ) {
622 $this->timeWhere = $this->dbw->buildComparison( '<=',
623 [ 'rev_timestamp' => $dbLimit, 'rev_id' => $this->revidLimit ]
625 } else {
626 $this->timeWhere = $this->dbw->buildComparison( '<=',
627 [ 'rev_timestamp' => $dbLimit ]
630 } catch ( TimestampException $ex ) {
631 // The timestamp we got is screwed up and merge cannot continue
632 // This should be detected by $this->isValidMerge()
633 $this->timestampLimit = null;
634 $this->timeWhere = null;
639 /** @deprecated class alias since 1.40 */
640 class_alias( MergeHistory::class, 'MergeHistory' );