Wrap libxml_disable_entity_loader() calls in version constraint
[mediawiki.git] / includes / MergeHistory.php
blob4c960b3a85da9d50e23cd0b1ed28f93181730476
1 <?php
3 /**
4 * Copyright © 2015 Geoffrey Mon <geofbot@gmail.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
21 * @file
24 use MediaWiki\Content\IContentHandlerFactory;
25 use MediaWiki\EditPage\SpamChecker;
26 use MediaWiki\HookContainer\HookContainer;
27 use MediaWiki\HookContainer\HookRunner;
28 use MediaWiki\MediaWikiServices;
29 use MediaWiki\Page\WikiPageFactory;
30 use MediaWiki\Permissions\PermissionManager;
31 use MediaWiki\Revision\MutableRevisionRecord;
32 use MediaWiki\Revision\RevisionStore;
33 use MediaWiki\Revision\SlotRecord;
34 use Wikimedia\Rdbms\IDatabase;
35 use Wikimedia\Rdbms\ILoadBalancer;
36 use Wikimedia\Timestamp\TimestampException;
38 /**
39 * Handles the backend logic of merging the histories of two
40 * pages.
42 * @since 1.27
44 class MergeHistory {
46 /** Maximum number of revisions that can be merged at once */
47 public const REVISION_LIMIT = 5000;
49 /** @var Title Page from which history will be merged */
50 protected $source;
52 /** @var Title Page to which history will be merged */
53 protected $dest;
55 /** @var IDatabase Database that we are using */
56 protected $dbw;
58 /** @var MWTimestamp Maximum timestamp that we can use (oldest timestamp of dest) */
59 protected $maxTimestamp;
61 /** @var string SQL WHERE condition that selects source revisions to insert into destination */
62 protected $timeWhere;
64 /** @var MWTimestamp|bool Timestamp upto which history from the source will be merged */
65 protected $timestampLimit;
67 /** @var int Number of revisions merged (for Special:MergeHistory success message) */
68 protected $revisionsMerged;
70 /** @var PermissionManager */
71 private $permManager;
73 /** @var IContentHandlerFactory */
74 private $contentHandlerFactory;
76 /** @var RevisionStore */
77 private $revisionStore;
79 /** @var WatchedItemStoreInterface */
80 private $watchedItemStore;
82 /** @var SpamChecker */
83 private $spamChecker;
85 /** @var HookRunner */
86 private $hookRunner;
88 /** @var WikiPageFactory */
89 private $wikiPageFactory;
91 /**
92 * Since 1.35 dependencies are injected and not providing them is hard deprecated; use the
93 * MergeHistoryFactory service
95 * @param Title $source Page from which history will be merged
96 * @param Title $dest Page to which history will be merged
97 * @param string|bool $timestamp Timestamp up to which history from the source will be merged
98 * @param ILoadBalancer|null $loadBalancer
99 * @param PermissionManager|null $permManager
100 * @param IContentHandlerFactory|null $contentHandlerFactory
101 * @param RevisionStore|null $revisionStore
102 * @param WatchedItemStoreInterface|null $watchedItemStore
103 * @param SpamChecker|null $spamChecker
104 * @param HookContainer|null $hookContainer
105 * @param WikiPageFactory|null $wikiPageFactory
107 public function __construct(
108 Title $source,
109 Title $dest,
110 $timestamp = false,
111 ILoadBalancer $loadBalancer = null,
112 PermissionManager $permManager = null,
113 IContentHandlerFactory $contentHandlerFactory = null,
114 RevisionStore $revisionStore = null,
115 WatchedItemStoreInterface $watchedItemStore = null,
116 SpamChecker $spamChecker = null,
117 HookContainer $hookContainer = null,
118 WikiPageFactory $wikiPageFactory = null
120 if ( $loadBalancer === null ) {
121 wfDeprecatedMsg( 'Direct construction of ' . __CLASS__ .
122 ' was deprecated in MediaWiki 1.35', '1.35' );
123 $services = MediaWikiServices::getInstance();
125 $loadBalancer = $services->getDBLoadBalancer();
126 $permManager = $services->getPermissionManager();
127 $contentHandlerFactory = $services->getContentHandlerFactory();
128 $revisionStore = $services->getRevisionStore();
129 $watchedItemStore = $services->getWatchedItemStore();
130 $spamChecker = $services->getSpamChecker();
131 $hookContainer = $services->getHookContainer();
132 $wikiPageFactory = $services->getWikiPageFactory();
135 // Save the parameters
136 $this->source = $source;
137 $this->dest = $dest;
139 // Get the database
140 $this->dbw = $loadBalancer->getConnection( DB_MASTER );
142 $this->permManager = $permManager;
143 $this->contentHandlerFactory = $contentHandlerFactory;
144 $this->revisionStore = $revisionStore;
145 $this->watchedItemStore = $watchedItemStore;
146 $this->spamChecker = $spamChecker;
147 $this->hookRunner = new HookRunner( $hookContainer );
148 $this->wikiPageFactory = $wikiPageFactory;
150 // Max timestamp should be min of destination page
151 $firstDestTimestamp = $this->dbw->selectField(
152 'revision',
153 'MIN(rev_timestamp)',
154 [ 'rev_page' => $this->dest->getArticleID() ],
155 __METHOD__
157 $this->maxTimestamp = new MWTimestamp( $firstDestTimestamp );
159 // Get the timestamp pivot condition
160 try {
161 if ( $timestamp ) {
162 // If we have a requested timestamp, use the
163 // latest revision up to that point as the insertion point
164 $mwTimestamp = new MWTimestamp( $timestamp );
165 $lastWorkingTimestamp = $this->dbw->selectField(
166 'revision',
167 'MAX(rev_timestamp)',
169 'rev_timestamp <= ' .
170 $this->dbw->addQuotes( $this->dbw->timestamp( $mwTimestamp ) ),
171 'rev_page' => $this->source->getArticleID()
173 __METHOD__
175 $mwLastWorkingTimestamp = new MWTimestamp( $lastWorkingTimestamp );
177 $timeInsert = $mwLastWorkingTimestamp;
178 $this->timestampLimit = $mwLastWorkingTimestamp;
179 } else {
180 // If we don't, merge entire source page history into the
181 // beginning of destination page history
183 // Get the latest timestamp of the source
184 $lastSourceTimestamp = $this->dbw->selectField(
185 [ 'page', 'revision' ],
186 'rev_timestamp',
187 [ 'page_id' => $this->source->getArticleID(),
188 'page_latest = rev_id'
190 __METHOD__
192 $lasttimestamp = new MWTimestamp( $lastSourceTimestamp );
194 $timeInsert = $this->maxTimestamp;
195 $this->timestampLimit = $lasttimestamp;
198 $this->timeWhere = "rev_timestamp <= " .
199 $this->dbw->addQuotes( $this->dbw->timestamp( $timeInsert ) );
200 } catch ( TimestampException $ex ) {
201 // The timestamp we got is screwed up and merge cannot continue
202 // This should be detected by $this->isValidMerge()
203 $this->timestampLimit = false;
208 * Get the number of revisions that will be moved
209 * @return int
211 public function getRevisionCount() {
212 $count = $this->dbw->selectRowCount( 'revision', '1',
213 [ 'rev_page' => $this->source->getArticleID(), $this->timeWhere ],
214 __METHOD__,
215 [ 'LIMIT' => self::REVISION_LIMIT + 1 ]
218 return $count;
222 * Get the number of revisions that were moved
223 * Used in the SpecialMergeHistory success message
224 * @return int
226 public function getMergedRevisionCount() {
227 return $this->revisionsMerged;
231 * Check if the merge is possible
232 * @param User $user
233 * @param string $reason
234 * @return Status
236 public function checkPermissions( User $user, $reason ) {
237 $status = new Status();
239 // Check if user can edit both pages
240 $errors = wfMergeErrorArrays(
241 $this->permManager->getPermissionErrors( 'edit', $user, $this->source ),
242 $this->permManager->getPermissionErrors( 'edit', $user, $this->dest )
245 // Convert into a Status object
246 if ( $errors ) {
247 foreach ( $errors as $error ) {
248 $status->fatal( ...$error );
252 // Anti-spam
253 if ( $this->spamChecker->checkSummary( $reason ) !== false ) {
254 // This is kind of lame, won't display nice
255 $status->fatal( 'spamprotectiontext' );
258 // Check mergehistory permission
259 if ( !$this->permManager->userHasRight( $user, 'mergehistory' ) ) {
260 // User doesn't have the right to merge histories
261 $status->fatal( 'mergehistory-fail-permission' );
264 return $status;
268 * Does various sanity checks that the merge is
269 * valid. Only things based on the two pages
270 * should be checked here.
272 * @return Status
274 public function isValidMerge() {
275 $status = new Status();
277 // If either article ID is 0, then revisions cannot be reliably selected
278 if ( $this->source->getArticleID() === 0 ) {
279 $status->fatal( 'mergehistory-fail-invalid-source' );
281 if ( $this->dest->getArticleID() === 0 ) {
282 $status->fatal( 'mergehistory-fail-invalid-dest' );
285 // Make sure page aren't the same
286 if ( $this->source->equals( $this->dest ) ) {
287 $status->fatal( 'mergehistory-fail-self-merge' );
290 // Make sure the timestamp is valid
291 if ( !$this->timestampLimit ) {
292 $status->fatal( 'mergehistory-fail-bad-timestamp' );
295 // $this->timestampLimit must be older than $this->maxTimestamp
296 if ( $this->timestampLimit > $this->maxTimestamp ) {
297 $status->fatal( 'mergehistory-fail-timestamps-overlap' );
300 // Check that there are not too many revisions to move
301 if ( $this->timestampLimit && $this->getRevisionCount() > self::REVISION_LIMIT ) {
302 $status->fatal( 'mergehistory-fail-toobig', Message::numParam( self::REVISION_LIMIT ) );
305 return $status;
309 * Actually attempt the history move
311 * @todo if all versions of page A are moved to B and then a user
312 * tries to do a reverse-merge via the "unmerge" log link, then page
313 * A will still be a redirect (as it was after the original merge),
314 * though it will have the old revisions back from before (as expected).
315 * The user may have to "undo" the redirect manually to finish the "unmerge".
316 * Maybe this should delete redirects at the source page of merges?
318 * @param User $user
319 * @param string $reason
320 * @return Status status of the history merge
322 public function merge( User $user, $reason = '' ) {
323 $status = new Status();
325 // Check validity and permissions required for merge
326 $validCheck = $this->isValidMerge(); // Check this first to check for null pages
327 if ( !$validCheck->isOK() ) {
328 return $validCheck;
330 $permCheck = $this->checkPermissions( $user, $reason );
331 if ( !$permCheck->isOK() ) {
332 return $permCheck;
335 $this->dbw->startAtomic( __METHOD__ );
337 $this->dbw->update(
338 'revision',
339 [ 'rev_page' => $this->dest->getArticleID() ],
340 [ 'rev_page' => $this->source->getArticleID(), $this->timeWhere ],
341 __METHOD__
344 // Check if this did anything
345 $this->revisionsMerged = $this->dbw->affectedRows();
346 if ( $this->revisionsMerged < 1 ) {
347 $this->dbw->endAtomic( __METHOD__ );
348 $status->fatal( 'mergehistory-fail-no-change' );
350 return $status;
353 // Update denormalized revactor_page too
354 $this->dbw->update(
355 'revision_actor_temp',
356 [ 'revactor_page' => $this->dest->getArticleID() ],
358 'revactor_page' => $this->source->getArticleID(),
359 // Slightly hacky, but should work given the values assigned in this class
360 str_replace( 'rev_timestamp', 'revactor_timestamp', $this->timeWhere )
362 __METHOD__
365 $haveRevisions = $this->dbw->lockForUpdate(
366 'revision',
367 [ 'rev_page' => $this->source->getArticleID() ],
368 __METHOD__
371 // Update source page, histories and invalidate caches
372 if ( !$haveRevisions ) {
373 if ( $reason ) {
374 $reason = wfMessage(
375 'mergehistory-comment',
376 $this->source->getPrefixedText(),
377 $this->dest->getPrefixedText(),
378 $reason
379 )->inContentLanguage()->text();
380 } else {
381 $reason = wfMessage(
382 'mergehistory-autocomment',
383 $this->source->getPrefixedText(),
384 $this->dest->getPrefixedText()
385 )->inContentLanguage()->text();
388 $this->updateSourcePage( $status, $user, $reason );
390 } else {
391 $this->source->invalidateCache();
393 $this->dest->invalidateCache();
395 // Duplicate watchers of the old article to the new article
396 $this->watchedItemStore->duplicateAllAssociatedEntries( $this->source, $this->dest );
398 // Update our logs
399 $logEntry = new ManualLogEntry( 'merge', 'merge' );
400 $logEntry->setPerformer( $user );
401 $logEntry->setComment( $reason );
402 $logEntry->setTarget( $this->source );
403 $logEntry->setParameters( [
404 '4::dest' => $this->dest->getPrefixedText(),
405 '5::mergepoint' => $this->timestampLimit->getTimestamp( TS_MW )
406 ] );
407 $logId = $logEntry->insert();
408 $logEntry->publish( $logId );
410 $this->hookRunner->onArticleMergeComplete( $this->source, $this->dest );
412 $this->dbw->endAtomic( __METHOD__ );
414 return $status;
418 * Do various cleanup work and updates to the source page. This method
419 * will only be called if no revision is remaining on the page.
421 * At the end, there would be either a redirect page or a deleted page,
422 * depending on whether the content model of the page supports redirects or not.
424 * @param Status $status
425 * @param User $user
426 * @param string $reason
428 * @return Status
430 private function updateSourcePage( $status, $user, $reason ) {
431 $deleteSource = false;
432 $sourceModel = $this->source->getContentModel();
433 $contentHandler = $this->contentHandlerFactory->getContentHandler( $sourceModel );
435 if ( !$contentHandler->supportsRedirects() ) {
436 $deleteSource = true;
437 $newContent = $contentHandler->makeEmptyContent();
438 } else {
439 $msg = wfMessage( 'mergehistory-redirect-text' )->inContentLanguage()->plain();
440 $newContent = $contentHandler->makeRedirectContent( $this->dest, $msg );
443 if ( !$newContent instanceof Content ) {
444 // Handler supports redirect but cannot create new redirect content?
445 // Not possible to proceed without Content.
447 // @todo. Remove this once there's no evidence it's happening or if it's
448 // determined all violating handlers have been fixed.
449 // This is mostly kept because previous code was also blindly checking
450 // existing of the Content for both content models that supports redirects
451 // and those that that don't, so it's hard to know what it was masking.
452 $logger = MediaWiki\Logger\LoggerFactory::getInstance( 'ContentHandler' );
453 $logger->warning(
454 'ContentHandler for {model} says it supports redirects but failed '
455 . 'to return Content object from ContentHandler::makeRedirectContent().'
456 . ' {value} returned instead.',
458 'value' => gettype( $newContent ),
459 'model' => $sourceModel
463 throw new InvalidArgumentException(
464 "ContentHandler for '$sourceModel' supports redirects" .
465 ' but cannot create redirect content during History merge.'
469 // T263340/T93469: Create revision record to also serve as the page revision.
470 // This revision will be used to create page content. If the source page's
471 // content model supports redirects, then it will be the redirect content.
472 // If the content model does not supports redirect, this content will aid
473 // proper deletion of the page below.
474 $comment = CommentStoreComment::newUnsavedComment( $reason );
475 $revRecord = new MutableRevisionRecord( $this->source );
476 $revRecord->setContent( SlotRecord::MAIN, $newContent )
477 ->setPageId( $this->source->getArticleID() )
478 ->setComment( $comment )
479 ->setUser( $user )
480 ->setTimestamp( wfTimestampNow() );
482 $insertedRevRecord = $this->revisionStore->insertRevisionOn( $revRecord, $this->dbw );
484 $newPage = $this->wikiPageFactory->newFromTitle( $this->source );
485 $newPage->updateRevisionOn( $this->dbw, $insertedRevRecord );
487 if ( !$deleteSource ) {
488 // We have created a redirect page so let's
489 // record the link from the page to the new title.
490 // It should have no other outgoing links...
491 $this->dbw->delete(
492 'pagelinks',
493 [ 'pl_from' => $this->dest->getArticleID() ],
494 __METHOD__
496 $this->dbw->insert( 'pagelinks',
498 'pl_from' => $this->dest->getArticleID(),
499 'pl_from_namespace' => $this->dest->getNamespace(),
500 'pl_namespace' => $this->dest->getNamespace(),
501 'pl_title' => $this->dest->getDBkey() ],
502 __METHOD__
505 } else {
506 // T263340/T93469: Delete the source page to prevent errors because its
507 // revisions are now tied to a different title and its content model
508 // does not support redirects, so we cannot leave a new revision on it.
509 // This deletion does not depend on userright but may still fails. If it
510 // fails, it will be communicated in the status reponse.
511 $reason = wfMessage( 'mergehistory-source-deleted-reason' )->inContentLanguage()->plain();
512 $deletionStatus = $newPage->doDeleteArticleReal( $reason, $user );
513 $status->merge( $deletionStatus );
516 return $status;