Updating composer/semver (1.5.1 => 1.5.2)
[mediawiki.git] / maintenance / namespaceDupes.php
blob25a533ce1241470f5dea1cebedf350b9cc18ca29
1 <?php
2 /**
3 * Check for articles to fix after adding/deleting namespaces
5 * Copyright © 2005-2007 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
23 * @file
24 * @ingroup Maintenance
27 require_once __DIR__ . '/Maintenance.php';
29 use MediaWiki\Linker\LinkTarget;
30 use MediaWiki\MediaWikiServices;
31 use Wikimedia\Rdbms\IDatabase;
32 use Wikimedia\Rdbms\IMaintainableDatabase;
33 use Wikimedia\Rdbms\IResultWrapper;
35 /**
36 * Maintenance script that checks for articles to fix after
37 * adding/deleting namespaces.
39 * @ingroup Maintenance
41 class NamespaceDupes extends Maintenance {
43 /**
44 * @var IMaintainableDatabase
46 protected $db;
48 /**
49 * Total number of pages that need fixing that are automatically resolveable
50 * @var int
52 private $resolvablePages = 0;
54 /**
55 * Total number of pages that need fixing
56 * @var int
58 private $totalPages = 0;
60 /**
61 * Total number of links that need fixing that are automatically resolveable
62 * @var int
64 private $resolvableLinks = 0;
66 /**
67 * Total number of erroneous links
68 * @var int
70 private $totalLinks = 0;
72 /**
73 * Total number of links deleted because they weren't automatically resolveable due to the
74 * target already existing
75 * @var int
77 private $deletedLinks = 0;
79 public function __construct() {
80 parent::__construct();
81 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
82 $this->addOption( 'fix', 'Attempt to automatically fix errors and delete broken links' );
83 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
84 "the correct title" );
85 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
86 "<text> appended after the article name", false, true );
87 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
88 "<text> prepended before the article name", false, true );
89 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
90 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
91 "the colon will be replaced with a hyphen.",
92 false, true );
93 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
94 "specify the namespace ID of the destination.", false, true );
95 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
96 "begin with a conflicting prefix will be renamed, for example " .
97 "Talk:File:Foo -> File_Talk:Foo" );
100 public function execute() {
101 $options = [
102 'fix' => $this->hasOption( 'fix' ),
103 'merge' => $this->hasOption( 'merge' ),
104 'add-suffix' => $this->getOption( 'add-suffix', '' ),
105 'add-prefix' => $this->getOption( 'add-prefix', '' ),
106 'move-talk' => $this->hasOption( 'move-talk' ),
107 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
108 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) )
111 if ( $options['source-pseudo-namespace'] !== '' ) {
112 $retval = $this->checkPrefix( $options );
113 } else {
114 $retval = $this->checkAll( $options );
117 if ( $retval ) {
118 $this->output( "\nLooks good!\n" );
119 } else {
120 $this->output( "\nOh noeees\n" );
125 * Check all namespaces
127 * @param array $options Associative array of validated command-line options
129 * @return bool
131 private function checkAll( $options ) {
132 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
133 $spaces = [];
135 // List interwikis first, so they'll be overridden
136 // by any conflicting local namespaces.
137 foreach ( $this->getInterwikiList() as $prefix ) {
138 $name = $contLang->ucfirst( $prefix );
139 $spaces[$name] = 0;
142 // Now pull in all canonical and alias namespaces...
143 foreach (
144 MediaWikiServices::getInstance()->getNamespaceInfo()->getCanonicalNamespaces()
145 as $ns => $name
147 // This includes $wgExtraNamespaces
148 if ( $name !== '' ) {
149 $spaces[$name] = $ns;
152 foreach ( $contLang->getNamespaces() as $ns => $name ) {
153 if ( $name !== '' ) {
154 $spaces[$name] = $ns;
157 foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
158 $spaces[$name] = $ns;
161 // We'll need to check for lowercase keys as well,
162 // since we're doing case-sensitive searches in the db.
163 $capitalLinks = $this->getConfig()->get( 'CapitalLinks' );
164 foreach ( $spaces as $name => $ns ) {
165 $moreNames = [];
166 $moreNames[] = $contLang->uc( $name );
167 $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
168 $moreNames[] = $contLang->ucwords( $name );
169 $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
170 $moreNames[] = $contLang->ucwordbreaks( $name );
171 $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
172 if ( !$capitalLinks ) {
173 foreach ( $moreNames as $altName ) {
174 $moreNames[] = $contLang->lcfirst( $altName );
176 $moreNames[] = $contLang->lcfirst( $name );
178 foreach ( array_unique( $moreNames ) as $altName ) {
179 if ( $altName !== $name ) {
180 $spaces[$altName] = $ns;
185 // Sort by namespace index, and if there are two with the same index,
186 // break the tie by sorting by name
187 $origSpaces = $spaces;
188 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
189 return $origSpaces[$a] <=> $origSpaces[$b]
190 ?: $a <=> $b;
191 } );
193 $ok = true;
194 foreach ( $spaces as $name => $ns ) {
195 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
198 $this->output(
199 "{$this->totalPages} pages to fix, " .
200 "{$this->resolvablePages} were resolvable.\n\n"
203 foreach ( $spaces as $name => $ns ) {
204 if ( $ns != 0 ) {
205 /* Fix up link destinations for non-interwiki links only.
207 * For example if a page has [[Foo:Bar]] and then a Foo namespace
208 * is introduced, pagelinks needs to be updated to have
209 * page_namespace = NS_FOO.
211 * If instead an interwiki prefix was introduced called "Foo",
212 * the link should instead be moved to the iwlinks table. If a new
213 * language is introduced called "Foo", or if there is a pagelink
214 * [[fr:Bar]] when interlanguage magic links are turned on, the
215 * link would have to be moved to the langlinks table. Let's put
216 * those cases in the too-hard basket for now. The consequences are
217 * not especially severe.
218 * @fixme Handle interwiki links, and pagelinks to Category:, File:
219 * which probably need reparsing.
222 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
223 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
225 // The redirect table has interwiki links randomly mixed in, we
226 // need to filter those out. For example [[w:Foo:Bar]] would
227 // have rd_interwiki=w and rd_namespace=0, which would match the
228 // query for a conflicting namespace "Foo" if filtering wasn't done.
229 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
230 [ 'rd_interwiki' => null ] );
231 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
232 [ 'rd_interwiki' => '' ] );
236 $this->output(
237 "{$this->totalLinks} links to fix, " .
238 "{$this->resolvableLinks} were resolvable, " .
239 "{$this->deletedLinks} were deleted.\n"
242 return $ok;
246 * @return string[]
248 private function getInterwikiList() {
249 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
250 return array_column( $result, 'iw_prefix' );
254 * Check a given prefix and try to move it into the given destination namespace
256 * @param int $ns Destination namespace id
257 * @param string $name
258 * @param array $options Associative array of validated command-line options
259 * @return bool
261 private function checkNamespace( $ns, $name, $options ) {
262 $targets = $this->getTargetList( $ns, $name, $options );
263 $count = $targets->numRows();
264 $this->totalPages += $count;
265 if ( $count == 0 ) {
266 return true;
269 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
271 $ok = true;
272 foreach ( $targets as $row ) {
273 // Find the new title and determine the action to take
275 $newTitle = $this->getDestinationTitle(
276 $ns, $name, $row->page_namespace, $row->page_title );
277 $logStatus = false;
278 if ( !$newTitle ) {
279 $logStatus = 'invalid title';
280 $action = 'abort';
281 } elseif ( $newTitle->exists() ) {
282 if ( $options['merge'] ) {
283 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
284 $action = 'merge';
285 } else {
286 $action = 'abort';
288 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
289 $action = 'abort';
290 $logStatus = 'dest title exists and --add-prefix not specified';
291 } else {
292 $newTitle = $this->getAlternateTitle( $newTitle, $options );
293 if ( !$newTitle ) {
294 $action = 'abort';
295 $logStatus = 'alternate title is invalid';
296 } elseif ( $newTitle->exists() ) {
297 $action = 'abort';
298 $logStatus = 'title conflict';
299 } else {
300 $action = 'move';
301 $logStatus = 'alternate';
304 } else {
305 $action = 'move';
306 $logStatus = 'no conflict';
309 // Take the action or log a dry run message
311 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
312 $pageOK = true;
314 switch ( $action ) {
315 case 'abort':
316 $this->output( "$logTitle *** $logStatus\n" );
317 $pageOK = false;
318 break;
319 case 'move':
320 $this->output( "$logTitle -> " .
321 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
323 if ( $options['fix'] ) {
324 $pageOK = $this->movePage( $row->page_id, $newTitle );
326 break;
327 case 'merge':
328 $this->output( "$logTitle => " .
329 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
331 if ( $options['fix'] ) {
332 $pageOK = $this->mergePage( $row, $newTitle );
334 break;
337 if ( $pageOK ) {
338 $this->resolvablePages++;
339 } else {
340 $ok = false;
344 return $ok;
348 * Check and repair the destination fields in a link table
349 * @param string $table The link table name
350 * @param string $fieldPrefix The field prefix in the link table
351 * @param int $ns Destination namespace id
352 * @param string $name
353 * @param array $options Associative array of validated command-line options
354 * @param array $extraConds Extra conditions for the SQL query
356 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
357 $extraConds = []
359 $dbw = $this->getDB( DB_MASTER );
361 $batchConds = [];
362 $fromField = "{$fieldPrefix}_from";
363 $namespaceField = "{$fieldPrefix}_namespace";
364 $titleField = "{$fieldPrefix}_title";
365 $batchSize = 500;
366 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
367 while ( true ) {
368 $res = $dbw->select(
369 $table,
370 [ $fromField, $namespaceField, $titleField ],
371 array_merge(
372 $batchConds,
373 $extraConds,
375 $namespaceField => 0,
376 $titleField . $dbw->buildLike( "$name:", $dbw->anyString() )
379 __METHOD__,
381 'ORDER BY' => [ $titleField, $fromField ],
382 'LIMIT' => $batchSize
386 if ( $res->numRows() == 0 ) {
387 break;
390 $rowsToDeleteIfStillExists = [];
392 foreach ( $res as $row ) {
393 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
394 "dbk={$row->$titleField}";
395 $destTitle = $this->getDestinationTitle(
396 $ns, $name, $row->$namespaceField, $row->$titleField );
397 $this->totalLinks++;
398 if ( !$destTitle ) {
399 $this->output( "$table $logTitle *** INVALID\n" );
400 continue;
402 $this->resolvableLinks++;
403 if ( !$options['fix'] ) {
404 $this->output( "$table $logTitle -> " .
405 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
406 continue;
409 $dbw->update( $table,
410 // SET
412 $namespaceField => $destTitle->getNamespace(),
413 $titleField => $destTitle->getDBkey()
415 // WHERE
417 $namespaceField => 0,
418 $titleField => $row->$titleField,
419 $fromField => $row->$fromField
421 __METHOD__,
422 [ 'IGNORE' ]
425 $rowsToDeleteIfStillExists[] = $dbw->makeList(
427 $fromField => $row->$fromField,
428 $namespaceField => $row->$namespaceField,
429 $titleField => $row->$titleField,
431 IDatabase::LIST_AND
434 $this->output( "$table $logTitle -> " .
435 $destTitle->getPrefixedDBkey() . "\n"
439 if ( $options['fix'] && count( $rowsToDeleteIfStillExists ) > 0 ) {
440 $dbw->delete(
441 $table,
442 $dbw->makeList( $rowsToDeleteIfStillExists, IDatabase::LIST_OR ),
443 __METHOD__
446 $this->deletedLinks += $dbw->affectedRows();
447 $this->resolvableLinks -= $dbw->affectedRows();
450 $encLastTitle = $dbw->addQuotes( $row->$titleField );
451 $encLastFrom = $dbw->addQuotes( $row->$fromField );
453 $batchConds = [
454 "$titleField > $encLastTitle " .
455 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)"
458 $lbFactory->waitForReplication();
463 * Move the given pseudo-namespace, either replacing the colon with a hyphen
464 * (useful for pseudo-namespaces that conflict with interwiki links) or move
465 * them to another namespace if specified.
466 * @param array $options Associative array of validated command-line options
467 * @return bool
469 private function checkPrefix( $options ) {
470 $prefix = $options['source-pseudo-namespace'];
471 $ns = $options['dest-namespace'];
472 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
474 return $this->checkNamespace( $ns, $prefix, $options );
478 * Find pages in main and talk namespaces that have a prefix of the new
479 * namespace so we know titles that will need migrating
481 * @param int $ns Destination namespace id
482 * @param string $name Prefix that is being made a namespace
483 * @param array $options Associative array of validated command-line options
485 * @return IResultWrapper
487 private function getTargetList( $ns, $name, $options ) {
488 $dbw = $this->getDB( DB_MASTER );
490 if (
491 $options['move-talk'] &&
492 MediaWikiServices::getInstance()->getNamespaceInfo()->isSubject( $ns )
494 $checkNamespaces = [ NS_MAIN, NS_TALK ];
495 } else {
496 $checkNamespaces = NS_MAIN;
499 return $dbw->select( 'page',
501 'page_id',
502 'page_title',
503 'page_namespace',
506 'page_namespace' => $checkNamespaces,
507 'page_title' . $dbw->buildLike( "$name:", $dbw->anyString() ),
509 __METHOD__
514 * Get the preferred destination title for a given target page.
515 * @param int $ns The destination namespace ID
516 * @param string $name The conflicting prefix
517 * @param int $sourceNs The source namespace
518 * @param int $sourceDbk The source DB key (i.e. page_title)
519 * @return Title|false
521 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
522 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
523 if ( $ns == 0 ) {
524 // An interwiki; try an alternate encoding with '-' for ':'
525 $dbk = "$name-" . $dbk;
527 $destNS = $ns;
528 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
529 if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) {
530 // This is an associated talk page moved with the --move-talk feature.
531 $destNS = $nsInfo->getTalk( $destNS );
533 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
534 if ( !$newTitle || !$newTitle->canExist() ) {
535 return false;
537 return $newTitle;
541 * Get an alternative title to move a page to. This is used if the
542 * preferred destination title already exists.
544 * @param LinkTarget $linkTarget
545 * @param array $options Associative array of validated command-line options
546 * @return Title|bool
548 private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
549 $prefix = $options['add-prefix'];
550 $suffix = $options['add-suffix'];
551 if ( $prefix == '' && $suffix == '' ) {
552 return false;
554 while ( true ) {
555 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
556 $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
557 if ( !$title ) {
558 return false;
560 if ( !$title->exists() ) {
561 return $title;
567 * Move a page
569 * @param int $id The page_id
570 * @param LinkTarget $newLinkTarget The new title link target
571 * @return bool
573 private function movePage( $id, LinkTarget $newLinkTarget ) {
574 $dbw = $this->getDB( DB_MASTER );
576 $dbw->update( 'page',
578 "page_namespace" => $newLinkTarget->getNamespace(),
579 "page_title" => $newLinkTarget->getDBkey(),
582 "page_id" => $id,
584 __METHOD__
587 // Update *_from_namespace in links tables
588 $fromNamespaceTables = [
589 [ 'pagelinks', 'pl' ],
590 [ 'templatelinks', 'tl' ],
591 [ 'imagelinks', 'il' ]
593 foreach ( $fromNamespaceTables as [ $table, $fieldPrefix ] ) {
594 $dbw->update( $table,
595 // SET
596 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
597 // WHERE
598 [ "{$fieldPrefix}_from" => $id ],
599 __METHOD__
603 return true;
607 * Determine if we can merge a page.
608 * We check if an inaccessible revision would become the latest and
609 * deny the merge if so -- it's theoretically possible to update the
610 * latest revision, but opens a can of worms -- search engine updates,
611 * recentchanges review, etc.
613 * @param int $id The page_id
614 * @param LinkTarget $linkTarget The new link target
615 * @param string &$logStatus This is set to the log status message on failure
616 * @return bool
618 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
619 $revisionLookup = MediaWikiServices::getInstance()->getRevisionLookup();
620 $latestDest = $revisionLookup->getRevisionByTitle( $linkTarget, 0,
621 IDBAccessObject::READ_LATEST );
622 $latestSource = $revisionLookup->getRevisionByPageId( $id, 0,
623 IDBAccessObject::READ_LATEST );
624 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
625 $logStatus = 'cannot merge since source is later';
626 return false;
627 } else {
628 return true;
633 * Merge page histories
635 * @param stdClass $row Page row
636 * @param Title $newTitle The new title
637 * @return bool
639 private function mergePage( $row, Title $newTitle ) {
640 $dbw = $this->getDB( DB_MASTER );
642 $id = $row->page_id;
644 // Construct the WikiPage object we will need later, while the
645 // page_id still exists. Note that this cannot use makeTitleSafe(),
646 // we are deliberately constructing an invalid title.
647 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
648 $sourceTitle->resetArticleID( $id );
649 $wikiPage = new WikiPage( $sourceTitle );
650 $wikiPage->loadPageData( 'fromdbmaster' );
652 $destId = $newTitle->getArticleID();
653 $this->beginTransaction( $dbw, __METHOD__ );
654 $dbw->update( 'revision',
655 // SET
656 [ 'rev_page' => $destId ],
657 // WHERE
658 [ 'rev_page' => $id ],
659 __METHOD__
662 $dbw->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
664 $this->commitTransaction( $dbw, __METHOD__ );
666 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
667 * and update category counts.
669 * Calling external code with a fake broken Title is a fairly dubious
670 * idea. It's necessary because it's quite a lot of code to duplicate,
671 * but that also makes it fragile since it would be easy for someone to
672 * accidentally introduce an assumption of title validity to the code we
673 * are calling.
675 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
676 DeferredUpdates::doUpdates();
678 return true;
682 $maintClass = NamespaceDupes::class;
683 require_once RUN_MAINTENANCE_IF_MAIN;