Merge "API: Properly handle limit and continuation for generator=prefixsearch"
[mediawiki.git] / maintenance / namespaceDupes.php
blob96e01fe43c277eea7a0c49ac823d9ec40c9b4cfc
1 <?php
2 /**
3 * Check for articles to fix after adding/deleting namespaces
5 * Copyright © 2005-2007 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
23 * @file
24 * @ingroup Maintenance
27 require_once __DIR__ . '/Maintenance.php';
29 /**
30 * Maintenance script that checks for articles to fix after
31 * adding/deleting namespaces.
33 * @ingroup Maintenance
35 class NamespaceConflictChecker extends Maintenance {
37 /**
38 * @var DatabaseBase
40 protected $db;
42 private $resolvableCount = 0;
43 private $totalPages = 0;
45 public function __construct() {
46 parent::__construct();
47 $this->mDescription = "";
48 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
49 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
50 "the correct title" );
51 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
52 "<text> appended after the article name", false, true );
53 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
54 "<text> prepended before the article name", false, true );
55 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
56 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
57 "the colon will be replaced with a hyphen.",
58 false, true );
59 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
60 "specify the namespace ID of the destination.", false, true );
61 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
62 "begin with a conflicting prefix will be renamed, for example " .
63 "Talk:File:Foo -> File_Talk:Foo" );
66 public function execute() {
67 $this->db = wfGetDB( DB_MASTER );
69 $options = array(
70 'fix' => $this->hasOption( 'fix' ),
71 'merge' => $this->hasOption( 'merge' ),
72 'add-suffix' => $this->getOption( 'add-suffix', '' ),
73 'add-prefix' => $this->getOption( 'add-prefix', '' ),
74 'move-talk' => $this->hasOption( 'move-talk' ),
75 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
76 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) );
78 if ( $options['source-pseudo-namespace'] !== '' ) {
79 $retval = $this->checkPrefix( $options );
80 } else {
81 $retval = $this->checkAll( $options );
84 if ( $retval ) {
85 $this->output( "\nLooks good!\n" );
86 } else {
87 $this->output( "\nOh noeees\n" );
91 /**
92 * Check all namespaces
94 * @param array $options Associative array of validated command-line options
96 * @return bool
98 private function checkAll( $options ) {
99 global $wgContLang, $wgNamespaceAliases, $wgCapitalLinks;
101 $spaces = array();
103 // List interwikis first, so they'll be overridden
104 // by any conflicting local namespaces.
105 foreach ( $this->getInterwikiList() as $prefix ) {
106 $name = $wgContLang->ucfirst( $prefix );
107 $spaces[$name] = 0;
110 // Now pull in all canonical and alias namespaces...
111 foreach ( MWNamespace::getCanonicalNamespaces() as $ns => $name ) {
112 // This includes $wgExtraNamespaces
113 if ( $name !== '' ) {
114 $spaces[$name] = $ns;
117 foreach ( $wgContLang->getNamespaces() as $ns => $name ) {
118 if ( $name !== '' ) {
119 $spaces[$name] = $ns;
122 foreach ( $wgNamespaceAliases as $name => $ns ) {
123 $spaces[$name] = $ns;
125 foreach ( $wgContLang->getNamespaceAliases() as $name => $ns ) {
126 $spaces[$name] = $ns;
129 // We'll need to check for lowercase keys as well,
130 // since we're doing case-sensitive searches in the db.
131 foreach ( $spaces as $name => $ns ) {
132 $moreNames = array();
133 $moreNames[] = $wgContLang->uc( $name );
134 $moreNames[] = $wgContLang->ucfirst( $wgContLang->lc( $name ) );
135 $moreNames[] = $wgContLang->ucwords( $name );
136 $moreNames[] = $wgContLang->ucwords( $wgContLang->lc( $name ) );
137 $moreNames[] = $wgContLang->ucwordbreaks( $name );
138 $moreNames[] = $wgContLang->ucwordbreaks( $wgContLang->lc( $name ) );
139 if ( !$wgCapitalLinks ) {
140 foreach ( $moreNames as $altName ) {
141 $moreNames[] = $wgContLang->lcfirst( $altName );
143 $moreNames[] = $wgContLang->lcfirst( $name );
145 foreach ( array_unique( $moreNames ) as $altName ) {
146 if ( $altName !== $name ) {
147 $spaces[$altName] = $ns;
152 // Sort by namespace index, and if there are two with the same index,
153 // break the tie by sorting by name
154 $origSpaces = $spaces;
155 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
156 if ( $origSpaces[$a] < $origSpaces[$b] ) {
157 return -1;
158 } elseif ( $origSpaces[$a] > $origSpaces[$b] ) {
159 return 1;
160 } elseif ( $a < $b ) {
161 return -1;
162 } elseif ( $a > $b ) {
163 return 1;
164 } else {
165 return 0;
167 } );
169 $ok = true;
170 foreach ( $spaces as $name => $ns ) {
171 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
174 $this->output( "{$this->totalPages} pages to fix, " .
175 "{$this->resolvableCount} were resolvable.\n" );
177 return $ok;
181 * Get the interwiki list
183 * @return array
185 private function getInterwikiList() {
186 $result = Interwiki::getAllPrefixes();
187 $prefixes = array();
188 foreach ( $result as $row ) {
189 $prefixes[] = $row['iw_prefix'];
192 return $prefixes;
196 * Check a given prefix and try to move it into the given destination namespace
198 * @param int $ns Destination namespace id
199 * @param string $name
200 * @param array $options Associative array of validated command-line options
201 * @return bool
203 private function checkNamespace( $ns, $name, $options ) {
204 $targets = $this->getTargetList( $ns, $name, $options );
205 $count = $targets->numRows();
206 $this->totalPages += $count;
207 if ( $count == 0 ) {
208 return true;
211 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
213 $ok = true;
214 foreach ( $targets as $row ) {
216 // Find the new title and determine the action to take
218 $newTitle = $this->getDestinationTitle( $ns, $name, $row, $options );
219 $logStatus = false;
220 if ( !$newTitle ) {
221 $logStatus = 'invalid title';
222 $action = 'abort';
223 } elseif ( $newTitle->exists() ) {
224 if ( $options['merge'] ) {
225 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
226 $action = 'merge';
227 } else {
228 $action = 'abort';
230 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
231 $action = 'abort';
232 $logStatus = 'dest title exists and --add-prefix not specified';
233 } else {
234 $newTitle = $this->getAlternateTitle( $newTitle, $options );
235 if ( !$newTitle ) {
236 $action = 'abort';
237 $logStatus = 'alternate title is invalid';
238 } elseif ( $newTitle->exists() ) {
239 $action = 'abort';
240 $logStatus = 'title conflict';
241 } else {
242 $action = 'move';
243 $logStatus = 'alternate';
246 } else {
247 $action = 'move';
248 $logStatus = 'no conflict';
251 // Take the action or log a dry run message
253 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
254 $pageOK = true;
256 switch ( $action ) {
257 case 'abort':
258 $this->output( "$logTitle *** $logStatus\n" );
259 $pageOK = false;
260 break;
261 case 'move':
262 $this->output( "$logTitle -> " .
263 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
265 if ( $options['fix'] ) {
266 $pageOK = $this->movePage( $row->page_id, $newTitle );
268 break;
269 case 'merge':
270 $this->output( "$logTitle => " .
271 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
273 if ( $options['fix'] ) {
274 $pageOK = $this->mergePage( $row->page_id, $newTitle );
276 break;
279 if ( $pageOK ) {
280 $this->resolvableCount++;
281 } else {
282 $ok = false;
286 // @fixme Also needs to do like self::getTargetList() on the
287 // *_namespace and *_title fields of pagelinks, templatelinks, and
288 // redirects, and schedule a LinksUpdate job or similar for each found
289 // *_from.
291 return $ok;
295 * Move the given pseudo-namespace, either replacing the colon with a hyphen
296 * (useful for pseudo-namespaces that conflict with interwiki links) or move
297 * them to another namespace if specified.
298 * @param array $options Associative array of validated command-line options
299 * @return bool
301 private function checkPrefix( $options ) {
302 $prefix = $options['source-pseudo-namespace'];
303 $ns = $options['dest-namespace'];
304 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
306 return $this->checkNamespace( $ns, $prefix, $options );
310 * Find pages in main and talk namespaces that have a prefix of the new
311 * namespace so we know titles that will need migrating
313 * @param int $ns Destination namespace id
314 * @param string $name Prefix that is being made a namespace
315 * @param array $options Associative array of validated command-line options
317 * @return ResultWrapper
319 private function getTargetList( $ns, $name, $options ) {
320 if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) {
321 $checkNamespaces = array( NS_MAIN, NS_TALK );
322 } else {
323 $checkNamespaces = NS_MAIN;
326 return $this->db->select( 'page',
327 array(
328 'page_id',
329 'page_title',
330 'page_namespace',
332 array(
333 'page_namespace' => $checkNamespaces,
334 'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ),
336 __METHOD__
341 * Get the preferred destination title for a given target page row.
342 * @param integer $ns The destination namespace ID
343 * @param string $name The conflicting prefix
344 * @param stdClass $row
345 * @param array $options Associative array of validated command-line options
346 * @return Title|false
348 private function getDestinationTitle( $ns, $name, $row, $options ) {
349 $dbk = substr( $row->page_title, strlen( "$name:" ) );
350 if ( $ns == 0 ) {
351 // An interwiki; try an alternate encoding with '-' for ':'
352 $dbk = "$name-" . $dbk;
354 $destNS = $ns;
355 if ( $row->page_namespace == NS_TALK && MWNamespace::isSubject( $ns ) ) {
356 // This is an associated talk page moved with the --move-talk feature.
357 $destNS = MWNamespace::getTalk( $destNS );
359 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
360 if ( !$newTitle || !$newTitle->canExist() ) {
361 return false;
363 return $newTitle;
367 * Get an alternative title to move a page to. This is used if the
368 * preferred destination title already exists.
370 * @param Title $title
371 * @param array $options Associative array of validated command-line options
372 * @return Title|bool
374 private function getAlternateTitle( $title, $options ) {
375 $prefix = $options['add-prefix'];
376 $suffix = $options['add-suffix'];
377 if ( $prefix == '' && $suffix == '' ) {
378 return false;
380 while ( true ) {
381 $dbk = $prefix . $title->getDBkey() . $suffix;
382 $title = Title::makeTitleSafe( $title->getNamespace(), $dbk );
383 if ( !$title ) {
384 return false;
386 if ( !$title->exists() ) {
387 return $title;
393 * Move a page
395 * @fixme Update pl_from_namespace etc.
397 * @param integer $id The page_id
398 * @param Title $newTitle The new title
399 * @return bool
401 private function movePage( $id, Title $newTitle ) {
402 $this->db->update( 'page',
403 array(
404 "page_namespace" => $newTitle->getNamespace(),
405 "page_title" => $newTitle->getDBkey(),
407 array(
408 "page_id" => $id,
410 __METHOD__ );
412 // @fixme Needs updating the *_from_namespace fields in categorylinks,
413 // pagelinks, templatelinks and imagelinks.
415 return true;
419 * Determine if we can merge a page.
420 * We check if an inaccessible revision would become the latest and
421 * deny the merge if so -- it's theoretically possible to update the
422 * latest revision, but opens a can of worms -- search engine updates,
423 * recentchanges review, etc.
425 * @param integer $id The page_id
426 * @param Title $newTitle The new title
427 * @param string $logStatus This is set to the log status message on failure
428 * @return bool
430 private function canMerge( $id, Title $newTitle, &$logStatus ) {
431 $latestDest = Revision::newFromTitle( $newTitle, 0, Revision::READ_LATEST );
432 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
433 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
434 $logStatus = 'cannot merge since source is later';
435 return false;
436 } else {
437 return true;
442 * Merge page histories
444 * @param integer $id The page_id
445 * @param Title $newTitle The new title
447 private function mergePage( $id, Title $newTitle ) {
448 $destId = $newTitle->getArticleId();
449 $this->db->begin( __METHOD__ );
450 $this->db->update( 'revision',
451 // SET
452 array( 'rev_page' => $destId ),
453 // WHERE
454 array( 'rev_page' => $id ),
455 __METHOD__ );
457 $this->db->delete( 'page', array( 'page_id' => $id ), __METHOD__ );
459 // @fixme Need WikiPage::doDeleteUpdates() or similar to avoid orphan
460 // rows in the links tables.
462 $this->db->commit( __METHOD__ );
463 return true;
467 $maintClass = "NamespaceConflictChecker";
468 require_once RUN_MAINTENANCE_IF_MAIN;