3 * Obligatory redundant license notice. Exception to the GPL's "keep intact all
4 * the notices" clause with respect to this notice is hereby granted.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
22 * @ingroup Maintenance
25 use MediaWiki\Maintenance\Maintenance
;
26 use MediaWiki\Title\Title
;
27 use MediaWiki\User\User
;
28 use MediaWiki\WikiMap\WikiMap
;
29 use Wikimedia\Rdbms\IDatabase
;
30 use Wikimedia\Rdbms\IExpression
;
31 use Wikimedia\Rdbms\IReadableDatabase
;
32 use Wikimedia\Rdbms\LikeValue
;
34 // @codeCoverageIgnoreStart
35 require_once __DIR__
. '/Maintenance.php';
36 // @codeCoverageIgnoreEnd
39 * Maintenance script to rename titles affected by changes to Unicode (or
40 * otherwise to Language::ucfirst).
42 * @ingroup Maintenance
44 class UppercaseTitlesForUnicodeTransition
extends Maintenance
{
46 private const MOVE
= 0;
47 private const INPLACE_MOVE
= 1;
48 private const UPPERCASE
= 2;
54 private $charmap = [];
60 private $reason = 'Uppercasing title for Unicode upgrade';
66 private $seenUsers = [];
68 /** @var array|null */
69 private $namespaces = null;
71 private ?
string $prefix = null;
72 private ?
string $suffix = null;
75 private $prefixNs = null;
77 /** @var string[]|null */
78 private $tables = null;
80 public function __construct() {
81 parent
::__construct();
82 $this->addDescription(
83 "Rename titles when changing behavior of Language::ucfirst().\n"
85 . "This script skips User and User_talk pages for registered users, as renaming of users "
86 . "is too complex to try to implement here. Use something like Extension:Renameuser to "
87 . "clean those up; this script can provide a list of user names affected."
90 'charmap', 'Character map generated by maintenance/language/generateUcfirstOverrides.php',
94 'user', 'System user to use to do the renames. Default is "Maintenance script".', false, true
98 'If the username specified by --user exists, specify this to force conversion to a system user.'
101 'run', 'If not specified, the script will not actually perform any moves (i.e. it will dry-run).'
104 'prefix', 'When the new title already exists, add this prefix.', false, true
107 'suffix', 'When the new title already exists, add this suffix.', false, true
109 $this->addOption( 'reason', 'Reason to use when moving pages.', false, true );
110 $this->addOption( 'tag', 'Change tag to apply when moving pages.', false, true );
111 $this->addOption( 'tables', 'Comma-separated list of database tables to process.', false, true );
113 'userlist', 'Filename to which to output usernames needing rename. ' .
114 'This file can then be used directly by renameInvalidUsernames.php maintenance script',
118 $this->setBatchSize( 1000 );
121 public function execute() {
122 $this->run
= $this->getOption( 'run', false );
125 $username = $this->getOption( 'user', User
::MAINTENANCE_SCRIPT_USER
);
126 $steal = $this->getOption( 'steal', false );
127 $this->user
= User
::newSystemUser( $username, [ 'steal' => $steal ] );
128 if ( !$this->user
) {
129 $user = User
::newFromName( $username );
130 if ( !$steal && $user && $user->isRegistered() ) {
131 $this->fatalError( "User $username already exists.\n"
132 . "Use --steal if you really want to steal it from the human who currently owns it."
135 $this->fatalError( "Could not obtain system user $username." );
139 $tables = $this->getOption( 'tables' );
140 if ( $tables !== null ) {
141 $this->tables
= explode( ',', $tables );
144 $prefix = $this->getOption( 'prefix' );
145 if ( $prefix !== null ) {
146 $title = Title
::newFromText( $prefix . 'X' );
147 if ( !$title ||
substr( $title->getDBkey(), -1 ) !== 'X' ) {
148 $this->fatalError( 'Invalid --prefix.' );
150 if ( $title->getNamespace() <= NS_MAIN ||
$title->isExternal() ) {
151 $this->fatalError( 'Invalid --prefix. It must not be in namespace 0 and must not be external' );
153 $this->prefixNs
= $title->getNamespace();
154 $this->prefix
= substr( $title->getText(), 0, -1 );
156 $this->suffix
= $this->getOption( 'suffix' );
158 $this->reason
= $this->getOption( 'reason' ) ?
: $this->reason
;
159 $this->tags
= (array)$this->getOption( 'tag', null );
161 $charmapFile = $this->getOption( 'charmap' );
162 if ( !file_exists( $charmapFile ) ) {
163 $this->fatalError( "Charmap file $charmapFile does not exist." );
165 if ( !is_file( $charmapFile ) ||
!is_readable( $charmapFile ) ) {
166 $this->fatalError( "Charmap file $charmapFile is not readable." );
168 $this->charmap
= require $charmapFile;
169 if ( !is_array( $this->charmap
) ) {
170 $this->fatalError( "Charmap file $charmapFile did not return a PHP array." );
172 $this->charmap
= array_filter(
174 function ( $v, $k ) {
175 if ( mb_strlen( $k ) !== 1 ) {
176 $this->error( "Ignoring mapping from multi-character key '$k' to '$v'" );
181 ARRAY_FILTER_USE_BOTH
183 if ( !$this->charmap
) {
184 $this->fatalError( "Charmap file $charmapFile did not contain any usable character mappings." );
187 $db = $this->run ?
$this->getPrimaryDB() : $this->getReplicaDB();
189 // Process inplace moves first, before actual moves, so mungeTitle() doesn't get confused
191 $db, self
::INPLACE_MOVE
, 'archive', 'ar_namespace', 'ar_title', [ 'ar_timestamp', 'ar_id' ]
194 $db, self
::INPLACE_MOVE
, 'filearchive', NS_FILE
, 'fa_name', [ 'fa_timestamp', 'fa_id' ]
197 $db, self
::INPLACE_MOVE
, 'logging', 'log_namespace', 'log_title', [ 'log_id' ]
200 $db, self
::INPLACE_MOVE
, 'protected_titles', 'pt_namespace', 'pt_title', []
202 $this->processTable( $db, self
::MOVE
, 'page', 'page_namespace', 'page_title', [ 'page_id' ] );
203 $this->processTable( $db, self
::MOVE
, 'image', NS_FILE
, 'img_name', [] );
205 $db, self
::UPPERCASE
, 'redirect', 'rd_namespace', 'rd_title', [ 'rd_from' ]
207 $this->processUsers( $db );
211 * Get batched LIKE conditions from the charmap
212 * @param IReadableDatabase $db Database handle
213 * @param string $field Field name
214 * @param int $batchSize Size of the batches
217 private function getLikeBatches( IReadableDatabase
$db, $field, $batchSize = 100 ) {
220 foreach ( $this->charmap
as $from => $to ) {
221 $likes[] = $db->expr(
224 new LikeValue( $from, $db->anyString() )
226 if ( count( $likes ) >= $batchSize ) {
227 $ret[] = $db->orExpr( $likes );
232 $ret[] = $db->orExpr( $likes );
238 * Get the list of namespaces to operate on
240 * We only care about namespaces where we can move pages and titles are
245 private function getNamespaces() {
246 if ( $this->namespaces
=== null ) {
247 $nsinfo = $this->getServiceContainer()->getNamespaceInfo();
248 $this->namespaces
= array_filter(
249 array_keys( $nsinfo->getCanonicalNamespaces() ),
250 static function ( $ns ) use ( $nsinfo ) {
251 return $nsinfo->isMovable( $ns ) && $nsinfo->isCapitalized( $ns );
254 usort( $this->namespaces
, static function ( $ns1, $ns2 ) use ( $nsinfo ) {
255 if ( $ns1 === $ns2 ) {
259 $s1 = $nsinfo->getSubject( $ns1 );
260 $s2 = $nsinfo->getSubject( $ns2 );
262 // Order by subject namespace number first
264 return $s1 < $s2 ?
-1 : 1;
267 // Second, put subject namespaces before non-subject namespaces
268 if ( $s1 === $ns1 ) {
271 if ( $s2 === $ns2 ) {
275 // Don't care about the relative order if there are somehow
276 // multiple non-subject namespaces for a namespace.
281 return $this->namespaces
;
285 * Check if a ns+title is a registered user's page
286 * @param IReadableDatabase $db Database handle
288 * @param string $title
291 private function isUserPage( IReadableDatabase
$db, $ns, $title ) {
292 if ( $ns !== NS_USER
&& $ns !== NS_USER_TALK
) {
296 [ $base ] = explode( '/', $title, 2 );
297 if ( !isset( $this->seenUsers
[$base] ) ) {
298 // Can't use User directly because it might uppercase the name
299 $this->seenUsers
[$base] = (bool)$db->newSelectQueryBuilder()
300 ->select( 'user_id' )
302 ->where( [ 'user_name' => strtr( $base, '_', ' ' ) ] )
303 ->caller( __METHOD__
)->fetchField();
305 return $this->seenUsers
[$base];
309 * Munge a target title, if necessary
310 * @param IReadableDatabase $db Database handle
311 * @param Title $oldTitle
312 * @param Title &$newTitle
313 * @return bool If $newTitle is (now) ok
315 private function mungeTitle( IReadableDatabase
$db, Title
$oldTitle, Title
&$newTitle ) {
316 $nt = $newTitle->getPrefixedText();
319 if ( $this->isUserPage( $db, $newTitle->getNamespace(), $newTitle->getText() ) ) {
320 $munge = 'Target title\'s user exists';
322 $mpFactory = $this->getServiceContainer()->getMovePageFactory();
323 $status = $mpFactory->newMovePage( $oldTitle, $newTitle )->isValidMove();
324 if ( !$status->isOK() && (
325 $status->hasMessage( 'articleexists' ) ||
$status->hasMessage( 'redirectexists' ) ) ) {
326 $munge = 'Target title exists';
333 if ( $this->prefix
!== null ) {
334 $newTitle = Title
::makeTitle(
336 $this->prefix
. $oldTitle->getPrefixedText() . ( $this->suffix ??
'' )
338 } elseif ( $this->suffix
!== null ) {
339 $dbkey = $newTitle->getText();
340 $i = $newTitle->getNamespace() === NS_FILE ?
strrpos( $dbkey, '.' ) : false;
341 if ( $i !== false ) {
342 $newTitle = Title
::makeTitle(
343 $newTitle->getNamespace(),
344 substr( $dbkey, 0, $i ) . $this->suffix
. substr( $dbkey, $i )
347 $newTitle = Title
::makeTitle( $newTitle->getNamespace(), $dbkey . $this->suffix
);
351 "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
352 . "$munge and no --prefix or --suffix was given"
357 if ( !$newTitle->canExist() ) {
359 "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
360 . "$munge and munged title '{$newTitle->getPrefixedText()}' is not valid"
364 if ( $newTitle->exists() ) {
366 "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
367 . "$munge and munged title '{$newTitle->getPrefixedText()}' also exists"
376 * Use MovePage to move a title
377 * @param IDatabase $db Database handle
379 * @param string $title
380 * @return bool|null True on success, false on error, null if skipped
382 private function doMove( IDatabase
$db, $ns, $title ) {
383 $char = mb_substr( $title, 0, 1 );
384 if ( !array_key_exists( $char, $this->charmap
) ) {
386 "Query returned NS$ns $title, which does not begin with a character in the charmap."
391 if ( $this->isUserPage( $db, $ns, $title ) ) {
392 $this->output( "... Skipping user page NS$ns $title\n" );
396 $oldTitle = Title
::makeTitle( $ns, $title );
397 $newTitle = Title
::makeTitle( $ns, $this->charmap
[$char] . mb_substr( $title, 1 ) );
398 $deletionReason = $this->shouldDelete( $db, $oldTitle, $newTitle );
399 if ( !$this->mungeTitle( $db, $oldTitle, $newTitle ) ) {
403 $services = $this->getServiceContainer();
404 $mpFactory = $services->getMovePageFactory();
405 $movePage = $mpFactory->newMovePage( $oldTitle, $newTitle );
406 $status = $movePage->isValidMove();
407 if ( !$status->isOK() ) {
408 $this->error( "Invalid move {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()}:" );
409 $this->error( $status );
415 "Would rename {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()}\n"
417 if ( $deletionReason ) {
419 "Would then delete {$newTitle->getPrefixedText()}: $deletionReason\n"
425 $status = $movePage->move( $this->user
, $this->reason
, false, $this->tags
);
426 if ( !$status->isOK() ) {
427 $this->error( "Move {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()} failed:" );
428 $this->error( $status );
430 $this->output( "Renamed {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()}\n" );
432 // The move created a log entry under the old invalid title. Fix it.
433 $db->newUpdateQueryBuilder()
434 ->update( 'logging' )
436 'log_title' => $this->charmap
[$char] . mb_substr( $title, 1 ),
439 'log_namespace' => $oldTitle->getNamespace(),
440 'log_title' => $oldTitle->getDBkey(),
441 'log_page' => $newTitle->getArticleID(),
443 ->caller( __METHOD__
)
446 if ( $deletionReason !== null ) {
447 $page = $services->getWikiPageFactory()->newFromTitle( $newTitle );
448 $delPage = $services->getDeletePageFactory()->newDeletePage( $page, $this->user
);
450 ->forceImmediate( true )
451 ->deleteUnsafe( $deletionReason );
452 if ( !$status->isOK() ) {
453 $this->error( "Deletion of {$newTitle->getPrefixedText()} failed:" );
454 $this->error( $status );
457 $this->output( "Deleted {$newTitle->getPrefixedText()}\n" );
464 * Determine whether the old title should be deleted
466 * If it's already a redirect to the new title, or the old and new titles
467 * are redirects to the same place, there's no point in keeping it.
469 * Note the caller will still rename it before deleting it, so the archive
470 * and logging rows wind up in a sensible place.
472 * @param IReadableDatabase $db
473 * @param Title $oldTitle
474 * @param Title $newTitle
475 * @return string|null Deletion reason, or null if it shouldn't be deleted
477 private function shouldDelete( IReadableDatabase
$db, Title
$oldTitle, Title
$newTitle ) {
478 $oldRow = $db->newSelectQueryBuilder()
479 ->select( [ 'ns' => 'rd_namespace', 'title' => 'rd_title' ] )
481 ->join( 'redirect', null, 'rd_from = page_id' )
482 ->where( [ 'page_namespace' => $oldTitle->getNamespace(), 'page_title' => $oldTitle->getDBkey() ] )
483 ->caller( __METHOD__
)->fetchRow();
489 if ( (int)$oldRow->ns
=== $newTitle->getNamespace() &&
490 $oldRow->title
=== $newTitle->getDBkey()
492 return $this->reason
. ", and found that [[{$oldTitle->getPrefixedText()}]] is "
493 . "already a redirect to [[{$newTitle->getPrefixedText()}]]";
495 $newRow = $db->newSelectQueryBuilder()
496 ->select( [ 'ns' => 'rd_namespace', 'title' => 'rd_title' ] )
498 ->join( 'redirect', null, 'rd_from = page_id' )
499 ->where( [ 'page_namespace' => $newTitle->getNamespace(), 'page_title' => $newTitle->getDBkey() ] )
500 ->caller( __METHOD__
)->fetchRow();
501 if ( $newRow && $oldRow->ns
=== $newRow->ns
&& $oldRow->title
=== $newRow->title
) {
502 $nt = Title
::makeTitle( $newRow->ns
, $newRow->title
);
503 return $this->reason
. ", and found that [[{$oldTitle->getPrefixedText()}]] and "
504 . "[[{$newTitle->getPrefixedText()}]] both redirect to [[{$nt->getPrefixedText()}]].";
512 * Directly update a database row
513 * @param IDatabase $db Database handle
514 * @param int $op Operation to perform
515 * - self::INPLACE_MOVE: Directly update the database table to move the page
516 * - self::UPPERCASE: Rewrite the table to point to the new uppercase title
517 * @param string $table
518 * @param string|int $nsField
519 * @param string $titleField
520 * @param stdClass $row
521 * @return bool|null True on success, false on error, null if skipped
523 private function doUpdate( IDatabase
$db, $op, $table, $nsField, $titleField, $row ) {
524 $ns = is_int( $nsField ) ?
$nsField : (int)$row->$nsField;
525 $title = $row->$titleField;
527 $char = mb_substr( $title, 0, 1 );
528 if ( !array_key_exists( $char, $this->charmap
) ) {
529 $r = json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
);
531 "Query returned $r, but title does not begin with a character in the charmap."
536 $oldTitle = Title
::makeTitle( $ns, $title );
537 $newTitle = Title
::makeTitle( $ns, $this->charmap
[$char] . mb_substr( $title, 1 ) );
538 if ( $op !== self
::UPPERCASE
&& !$this->mungeTitle( $db, $oldTitle, $newTitle ) ) {
543 $db->newUpdateQueryBuilder()
546 is_int( $nsField ) ?
[] : [ $nsField => $newTitle->getNamespace() ],
547 [ $titleField => $newTitle->getDBkey() ]
549 ->where( (array)$row )
550 ->caller( __METHOD__
)
552 $r = json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
);
553 $this->output( "Set $r to {$newTitle->getPrefixedText()}\n" );
555 $r = json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
);
556 $this->output( "Would set $r to {$newTitle->getPrefixedText()}\n" );
563 * Rename entries in other tables
564 * @param IDatabase $db Database handle
565 * @param int $op Operation to perform
566 * - self::MOVE: Use MovePage to move the page
567 * - self::INPLACE_MOVE: Directly update the database table to move the page
568 * - self::UPPERCASE: Rewrite the table to point to the new uppercase title
569 * @param string $table
570 * @param string|int $nsField
571 * @param string $titleField
572 * @param string[] $pkFields Additional fields to match a unique index
573 * starting with $nsField and $titleField.
575 private function processTable( IDatabase
$db, $op, $table, $nsField, $titleField, $pkFields ) {
576 if ( $this->tables
!== null && !in_array( $table, $this->tables
, true ) ) {
577 $this->output( "Skipping table `$table`, not in --tables.\n" );
581 $batchSize = $this->getBatchSize();
582 $namespaces = $this->getNamespaces();
583 $likes = $this->getLikeBatches( $db, $titleField );
585 if ( is_int( $nsField ) ) {
586 $namespaces = array_intersect( $namespaces, [ $nsField ] );
589 if ( !$namespaces ) {
590 $this->output( "Skipping table `$table`, no valid namespaces.\n" );
594 $this->output( "Processing table `$table`...\n" );
596 $selectFields = array_merge(
597 is_int( $nsField ) ?
[] : [ $nsField ],
601 $contFields = array_merge( [ $titleField ], $pkFields );
603 $lastReplicationWait = 0.0;
606 foreach ( $namespaces as $ns ) {
607 foreach ( $likes as $like ) {
610 $res = $db->newSelectQueryBuilder()
611 ->select( $selectFields )
613 ->where( [ "$nsField = $ns", $like, $cont ?
$db->buildComparison( '>', $cont ) : '1=1' ] )
614 ->orderBy( array_merge( [ $titleField ], $pkFields ) )
615 ->limit( $batchSize )
616 ->caller( __METHOD__
)->fetchResultSet();
619 $this->beginTransactionRound( __METHOD__
);
620 foreach ( $res as $row ) {
622 foreach ( $contFields as $field ) {
623 $cont[ $field ] = $row->$field;
626 if ( $op === self
::MOVE
) {
627 $ns = is_int( $nsField ) ?
$nsField : (int)$row->$nsField;
628 $ret = $this->doMove( $db, $ns, $row->$titleField );
630 $ret = $this->doUpdate( $db, $op, $table, $nsField, $titleField, $row );
632 if ( $ret === true ) {
634 } elseif ( $ret === false ) {
638 $this->commitTransactionRound( __METHOD__
);
641 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
642 $r = $cont ?
json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
) : '<end>';
643 $this->output( "... $table: $count renames, $errors errors at $r\n" );
649 $this->output( "Done processing table `$table`.\n" );
653 * List users needing renaming
654 * @param IReadableDatabase $db Database handle
656 private function processUsers( IReadableDatabase
$db ) {
657 $userlistFile = $this->getOption( 'userlist' );
658 if ( $userlistFile === null ) {
659 $this->output( "Not generating user list, --userlist was not specified.\n" );
663 $fh = fopen( $userlistFile, 'ab' );
665 $this->error( "Could not open user list file $userlistFile" );
669 $this->output( "Generating user list...\n" );
671 $batchSize = $this->getBatchSize();
672 foreach ( $this->getLikeBatches( $db, 'user_name' ) as $like ) {
675 $rows = $db->newSelectQueryBuilder()
676 ->select( [ 'user_id', 'user_name' ] )
680 ->orderBy( 'user_name' )
681 ->limit( $batchSize )
682 ->caller( __METHOD__
)->fetchResultSet();
684 if ( !$rows->numRows() ) {
688 foreach ( $rows as $row ) {
689 $char = mb_substr( $row->user_name
, 0, 1 );
690 if ( !array_key_exists( $char, $this->charmap
) ) {
692 "Query returned $row->user_name, but user name does not " .
693 "begin with a character in the charmap."
697 $newName = $this->charmap
[$char] . mb_substr( $row->user_name
, 1 );
698 fprintf( $fh, "%s\t%s\t%s\n", WikiMap
::getCurrentWikiId(), $row->user_id
, $newName );
700 $cont = [ $db->expr( 'user_name', '>', $row->user_name
) ];
702 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
703 $this->output( "... at $row->user_name, $count names so far\n" );
707 if ( !fclose( $fh ) ) {
708 $this->error( "fclose on $userlistFile failed" );
710 $this->output( "User list output to $userlistFile, $count users need renaming.\n" );
714 // @codeCoverageIgnoreStart
715 $maintClass = UppercaseTitlesForUnicodeTransition
::class;
716 require_once RUN_MAINTENANCE_IF_MAIN
;
717 // @codeCoverageIgnoreEnd