Sync up with Parsoid parserTests.txt
[mediawiki.git] / includes / ActorMigrationBase.php
blob5a278ec50c4566f9f11fed4119c3ac4e8d5270dd
1 <?php
2 /**
3 * Methods to help with the actor table migration
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 use MediaWiki\User\ActorStoreFactory;
24 use MediaWiki\User\UserIdentity;
25 use Wikimedia\IPUtils;
26 use Wikimedia\Rdbms\IDatabase;
28 /**
29 * This abstract base class helps migrate core and extension code to use the
30 * actor table.
32 * @stable to extend
33 * @since 1.37
35 class ActorMigrationBase {
36 /** @var array[] Cache for `self::getJoin()` */
37 private $joinCache = [];
39 /** @var int One of the SCHEMA_COMPAT_READ_* values */
40 private $readStage;
42 /** @var int A combination of the SCHEMA_COMPAT_WRITE_* flags */
43 private $writeStage;
45 /** @var ActorStoreFactory */
46 private $actorStoreFactory;
48 /** @var array */
49 private $fieldInfos;
51 /** @var bool */
52 private $allowUnknown;
54 /**
55 * @param array $fieldInfos An array of associative arrays, giving configuration
56 * information about fields which are being migrated. Subkeys are:
57 * - removedVersion: The version in which the field was removed
58 * - deprecatedVersion: The version in which the field was deprecated
59 * - component: The component for removedVersion and deprecatedVersion.
60 * Default: MediaWiki.
61 * - textField: Override the old text field name. Default {$key}_text.
62 * - actorField: Override the actor field name. Default {$key}_actor.
63 * - tempTable: An array of information about the temp table linking
64 * the old table to the actor table. Default: no temp table is used.
65 * If set, the following subkeys must be present:
66 * - table: Temporary table name
67 * - pk: Temporary table column referring to the main table's primary key
68 * - field: Temporary table column referring actor.actor_id
69 * - joinPK: Main table's primary key
70 * - extra: An array of extra field names to be copied into the
71 * temp table for indexing. The key is the field name in the temp
72 * table, and the value is the field name in the main table.
73 * - formerTempTableVersion: The version of the component in which this
74 * field used a temp table. If present, getInsertValuesWithTempTable()
75 * still works, but issues a deprecation warning.
76 * All subkeys are optional.
78 * @stable to override
79 * @stable to call
81 * @param int $stage The migration stage. This is a combination of
82 * SCHEMA_COMPAT_* flags:
83 * - SCHEMA_COMPAT_READ_OLD, SCHEMA_COMPAT_WRITE_OLD: Use the old schema,
84 * with *_user and *_user_text fields.
85 * - SCHEMA_COMPAT_READ_TEMP, SCHEMA_COMPAT_WRITE_TEMP: Use the new schema,
86 * with an actor table. Normal tables are joined via a *_actor field,
87 * whereas temp tables are joined to the actor table via an
88 * intermediate table.
89 * - SCHEMA_COMPAT_READ_NEW, SCHEMA_COMPAT_WRITE_NEW: Use the new
90 * schema. Former temp tables are no longer used, and all relevant
91 * tables join directly to the actor table.
93 * @param ActorStoreFactory $actorStoreFactory
94 * @param array $options Array of other options. May contain:
95 * - allowUnknown: Allow fields not present in $fieldInfos. True by default.
97 public function __construct(
98 $fieldInfos,
99 $stage,
100 ActorStoreFactory $actorStoreFactory,
101 $options = []
103 $this->fieldInfos = $fieldInfos;
104 $this->allowUnknown = $options['allowUnknown'] ?? true;
106 $writeStage = $stage & SCHEMA_COMPAT_WRITE_MASK;
107 $readStage = $stage & SCHEMA_COMPAT_READ_MASK;
108 if ( $writeStage === 0 ) {
109 throw new InvalidArgumentException( '$stage must include a write mode' );
111 if ( $readStage === 0 ) {
112 throw new InvalidArgumentException( '$stage must include a read mode' );
114 if ( !in_array( $readStage,
115 [ SCHEMA_COMPAT_READ_OLD, SCHEMA_COMPAT_READ_TEMP, SCHEMA_COMPAT_READ_NEW ] )
117 throw new InvalidArgumentException( 'Cannot read multiple schemas' );
119 if ( $readStage === SCHEMA_COMPAT_READ_OLD && !( $writeStage & SCHEMA_COMPAT_WRITE_OLD ) ) {
120 throw new InvalidArgumentException( 'Cannot read the old schema without also writing it' );
122 if ( $readStage === SCHEMA_COMPAT_READ_TEMP && !( $writeStage & SCHEMA_COMPAT_WRITE_TEMP ) ) {
123 throw new InvalidArgumentException( 'Cannot read the temp schema without also writing it' );
125 if ( $readStage === SCHEMA_COMPAT_READ_NEW && !( $writeStage & SCHEMA_COMPAT_WRITE_NEW ) ) {
126 throw new InvalidArgumentException( 'Cannot read the new schema without also writing it' );
128 $this->readStage = $readStage;
129 $this->writeStage = $writeStage;
131 $this->actorStoreFactory = $actorStoreFactory;
135 * Get config information about a field.
137 * @stable to override
139 * @param string $key
140 * @return array
142 protected function getFieldInfo( $key ) {
143 if ( isset( $this->fieldInfos[$key] ) ) {
144 return $this->fieldInfos[$key];
145 } elseif ( $this->allowUnknown ) {
146 return [];
147 } else {
148 throw new InvalidArgumentException( $this->getInstanceName() . ": unknown key $key" );
153 * Get a name for this instance to use in error messages
155 * @stable to override
157 * @return string
158 * @throws ReflectionException
160 protected function getInstanceName() {
161 if ( ( new ReflectionClass( $this ) )->isAnonymous() ) {
162 // Mostly for PHPUnit
163 return self::class;
164 } else {
165 return static::class;
170 * Issue deprecation warning/error as appropriate.
172 * @internal
174 * @param string $key
176 protected function checkDeprecation( $key ) {
177 $fieldInfo = $this->getFieldInfo( $key );
178 if ( isset( $fieldInfo['removedVersion'] ) ) {
179 $removedVersion = $fieldInfo['removedVersion'];
180 $component = $fieldInfo['component'] ?? 'MediaWiki';
181 throw new InvalidArgumentException(
182 "Use of {$this->getInstanceName()} for '$key' was removed in $component $removedVersion"
185 if ( isset( $fieldInfo['deprecatedVersion'] ) ) {
186 $deprecatedVersion = $fieldInfo['deprecatedVersion'];
187 $component = $fieldInfo['component'] ?? 'MediaWiki';
188 wfDeprecated( "{$this->getInstanceName()} for '$key'", $deprecatedVersion, $component, 3 );
193 * Return an SQL condition to test if a user field is anonymous
194 * @param string $field Field name or SQL fragment
195 * @return string
197 public function isAnon( $field ) {
198 return ( $this->readStage >= SCHEMA_COMPAT_READ_TEMP ) ? "$field IS NULL" : "$field = 0";
202 * Return an SQL condition to test if a user field is non-anonymous
203 * @param string $field Field name or SQL fragment
204 * @return string
206 public function isNotAnon( $field ) {
207 return ( $this->readStage >= SCHEMA_COMPAT_READ_TEMP ) ? "$field IS NOT NULL" : "$field != 0";
211 * @param string $key A key such as "rev_user" identifying the actor
212 * field being fetched.
213 * @return string[] [ $text, $actor ]
215 private function getFieldNames( $key ) {
216 $fieldInfo = $this->getFieldInfo( $key );
217 $textField = $fieldInfo['textField'] ?? $key . '_text';
218 $actorField = $fieldInfo['actorField'] ?? substr( $key, 0, -5 ) . '_actor';
219 return [ $textField, $actorField ];
223 * Convenience function for getting temp table config
225 * @param string $key
226 * @return array|null
228 private function getTempTableInfo( $key ) {
229 $fieldInfo = $this->getFieldInfo( $key );
230 return $fieldInfo['tempTable'] ?? null;
234 * Get SELECT fields and joins for the actor key
236 * @param string $key A key such as "rev_user" identifying the actor
237 * field being fetched.
238 * @return array[] With three keys:
239 * - tables: (string[]) to include in the `$table` to `IDatabase->select()` or `SelectQueryBuilder::tables`
240 * - fields: (string[]) to include in the `$vars` to `IDatabase->select()` or `SelectQueryBuilder::fields`
241 * - joins: (array) to include in the `$join_conds` to `IDatabase->select()` or `SelectQueryBuilder::joinConds`
242 * All tables, fields, and joins are aliased, so `+` is safe to use.
243 * @phan-return array{tables:string[],fields:string[],joins:array}
245 public function getJoin( $key ) {
246 $this->checkDeprecation( $key );
248 if ( !isset( $this->joinCache[$key] ) ) {
249 $tables = [];
250 $fields = [];
251 $joins = [];
253 list( $text, $actor ) = $this->getFieldNames( $key );
255 if ( $this->readStage === SCHEMA_COMPAT_READ_OLD ) {
256 $fields[$key] = $key;
257 $fields[$text] = $text;
258 $fields[$actor] = 'NULL';
259 } elseif ( $this->readStage === SCHEMA_COMPAT_READ_TEMP ) {
260 $tempTableInfo = $this->getTempTableInfo( $key );
261 if ( $tempTableInfo ) {
262 $alias = "temp_$key";
263 $tables[$alias] = $tempTableInfo['table'];
264 $joins[$alias] = [ 'JOIN',
265 "{$alias}.{$tempTableInfo['pk']} = {$tempTableInfo['joinPK']}" ];
266 $joinField = "{$alias}.{$tempTableInfo['field']}";
267 } else {
268 $joinField = $actor;
271 $alias = "actor_$key";
272 $tables[$alias] = 'actor';
273 $joins[$alias] = [ 'JOIN', "{$alias}.actor_id = {$joinField}" ];
275 $fields[$key] = "{$alias}.actor_user";
276 $fields[$text] = "{$alias}.actor_name";
277 $fields[$actor] = $joinField;
278 } else /* SCHEMA_COMPAT_READ_NEW */ {
279 $alias = "actor_$key";
280 $tables[$alias] = 'actor';
281 $joins[$alias] = [ 'JOIN', "{$alias}.actor_id = {$actor}" ];
283 $fields[$key] = "{$alias}.actor_user";
284 $fields[$text] = "{$alias}.actor_name";
285 $fields[$actor] = $actor;
288 $this->joinCache[$key] = [
289 'tables' => $tables,
290 'fields' => $fields,
291 'joins' => $joins,
295 return $this->joinCache[$key];
299 * Get UPDATE fields for the actor
301 * @param IDatabase $dbw Database to use for creating an actor ID, if necessary
302 * @param string $key A key such as "rev_user" identifying the actor
303 * field being fetched.
304 * @param UserIdentity $user User to set in the update
305 * @return array to merge into `$values` to `IDatabase->update()` or `$a` to `IDatabase->insert()`
307 public function getInsertValues( IDatabase $dbw, $key, UserIdentity $user ) {
308 $this->checkDeprecation( $key );
310 if ( $this->getTempTableInfo( $key ) ) {
311 throw new InvalidArgumentException( "Must use getInsertValuesWithTempTable() for $key" );
314 list( $text, $actor ) = $this->getFieldNames( $key );
315 $ret = [];
316 if ( $this->writeStage & SCHEMA_COMPAT_WRITE_OLD ) {
317 $ret[$key] = $user->getId();
318 $ret[$text] = $user->getName();
320 if ( $this->writeStage & SCHEMA_COMPAT_WRITE_TEMP
321 || $this->writeStage & SCHEMA_COMPAT_WRITE_NEW
323 $ret[$actor] = $this->actorStoreFactory
324 ->getActorNormalization( $dbw->getDomainID() )
325 ->acquireActorId( $user, $dbw );
327 return $ret;
331 * Get UPDATE fields for the actor
333 * @param IDatabase $dbw Database to use for creating an actor ID, if necessary
334 * @param string $key A key such as "rev_user" identifying the actor
335 * field being fetched.
336 * @param UserIdentity $user User to set in the update
337 * @return array with two values:
338 * - array to merge into `$values` to `IDatabase->update()` or `$a` to `IDatabase->insert()`
339 * - callback to call with the primary key for the main table insert
340 * and extra fields needed for the temp table.
342 public function getInsertValuesWithTempTable( IDatabase $dbw, $key, UserIdentity $user ) {
343 $this->checkDeprecation( $key );
345 $fieldInfo = $this->getFieldInfo( $key );
346 $tempTableInfo = $fieldInfo['tempTable'] ?? null;
347 if ( isset( $fieldInfo['formerTempTableVersion'] ) ) {
348 wfDeprecated( __METHOD__ . " for $key",
349 $fieldInfo['formerTempTableVersion'],
350 $fieldInfo['component'] ?? 'MediaWiki' );
351 } elseif ( !$tempTableInfo ) {
352 throw new InvalidArgumentException( "Must use getInsertValues() for $key" );
355 list( $text, $actor ) = $this->getFieldNames( $key );
356 $ret = [];
357 $callback = null;
359 if ( $this->writeStage & SCHEMA_COMPAT_WRITE_OLD ) {
360 $ret[$key] = $user->getId();
361 $ret[$text] = $user->getName();
363 if ( $this->writeStage & ( SCHEMA_COMPAT_WRITE_TEMP | SCHEMA_COMPAT_WRITE_NEW ) ) {
364 $id = $this->actorStoreFactory
365 ->getActorNormalization( $dbw->getDomainID() )
366 ->acquireActorId( $user, $dbw );
368 if ( $tempTableInfo ) {
369 if ( $this->writeStage & SCHEMA_COMPAT_WRITE_TEMP ) {
370 $func = __METHOD__;
371 $callback = static function ( $pk, array $extra ) use ( $tempTableInfo, $dbw, $id, $func ) {
372 $set = [ $tempTableInfo['field'] => $id ];
373 foreach ( $tempTableInfo['extra'] as $to => $from ) {
374 if ( !array_key_exists( $from, $extra ) ) {
375 throw new InvalidArgumentException( "$func callback: \$extra[$from] is not provided" );
377 $set[$to] = $extra[$from];
379 $dbw->upsert(
380 $tempTableInfo['table'],
381 [ $tempTableInfo['pk'] => $pk ] + $set,
382 [ [ $tempTableInfo['pk'] ] ],
383 $set,
384 $func
388 if ( $this->writeStage & SCHEMA_COMPAT_WRITE_NEW ) {
389 $ret[$actor] = $id;
391 } else {
392 $ret[$actor] = $id;
396 if ( $callback === null ) {
397 // Make a validation-only callback if there was temp table info
398 if ( $tempTableInfo ) {
399 $func = __METHOD__;
400 $callback = static function ( $pk, array $extra ) use ( $tempTableInfo, $func ) {
401 foreach ( $tempTableInfo['extra'] as $from ) {
402 if ( !array_key_exists( $from, $extra ) ) {
403 throw new InvalidArgumentException( "$func callback: \$extra[$from] is not provided" );
407 } else {
408 $callback = static function ( $pk, array $extra ) {
412 return [ $ret, $callback ];
416 * Get WHERE condition for the actor
418 * @param IDatabase $db Database to use for quoting and list-making
419 * @param string $key A key such as "rev_user" identifying the actor
420 * field being fetched.
421 * @param UserIdentity|UserIdentity[]|null|false $users Users to test for.
422 * Passing null, false, or the empty array will return 'conds' that never match,
423 * and an empty array for 'orconds'.
424 * @param bool $useId If false, don't try to query by the user ID.
425 * Intended for use with rc_user since it has an index on
426 * (rc_user_text,rc_timestamp) but not (rc_user,rc_timestamp).
427 * @return array With four keys:
428 * - tables: (string[]) to include in the `$table` to `IDatabase->select()` or `SelectQueryBuilder::tables`
429 * - conds: (string) to include in the `$cond` to `IDatabase->select()` or `SelectQueryBuilder::conds`
430 * - orconds: (array[]) array of alternatives in case a union of multiple
431 * queries would be more efficient than a query with OR. May have keys
432 * 'actor', 'userid', 'username'.
433 * Since 1.32, this is guaranteed to contain just one alternative if
434 * $users contains a single user.
435 * - joins: (array) to include in the `$join_conds` to `IDatabase->select()` or `SelectQueryBuilder::joinConds`
436 * All tables and joins are aliased, so `+` is safe to use.
437 * @phan-return array{tables:string[],conds:string,orconds:array[],joins:array}
439 public function getWhere( IDatabase $db, $key, $users, $useId = true ) {
440 $this->checkDeprecation( $key );
442 $tables = [];
443 $conds = [];
444 $joins = [];
446 if ( $users instanceof UserIdentity ) {
447 $users = [ $users ];
448 } elseif ( $users === null || $users === false ) {
449 // DWIM
450 $users = [];
451 } elseif ( !is_array( $users ) ) {
452 $what = is_object( $users ) ? get_class( $users ) : gettype( $users );
453 throw new InvalidArgumentException(
454 __METHOD__ . ": Value for \$users must be a UserIdentity or array, got $what"
458 // Get information about all the passed users
459 $ids = [];
460 $names = [];
461 $actors = [];
462 foreach ( $users as $user ) {
463 if ( $useId && $user->isRegistered() ) {
464 $ids[] = $user->getId();
465 } else {
466 // make sure to use normalized form of IP for anonymous users
467 $names[] = IPUtils::sanitizeIP( $user->getName() );
469 $actorId = $this->actorStoreFactory
470 ->getActorNormalization( $db->getDomainID() )
471 ->findActorId( $user, $db );
473 if ( $actorId ) {
474 $actors[] = $actorId;
478 list( $text, $actor ) = $this->getFieldNames( $key );
480 // Combine data into conditions to be ORed together
481 if ( $this->readStage === SCHEMA_COMPAT_READ_NEW ) {
482 if ( $actors ) {
483 $conds['newactor'] = $db->makeList( [ $actor => $actors ], IDatabase::LIST_AND );
485 } elseif ( $this->readStage === SCHEMA_COMPAT_READ_TEMP ) {
486 if ( $actors ) {
487 $tempTableInfo = $this->getTempTableInfo( $key );
488 if ( $tempTableInfo ) {
489 $alias = "temp_$key";
490 $tables[$alias] = $tempTableInfo['table'];
491 $joins[$alias] = [ 'JOIN',
492 "{$alias}.{$tempTableInfo['pk']} = {$tempTableInfo['joinPK']}" ];
493 $joinField = "{$alias}.{$tempTableInfo['field']}";
494 } else {
495 $joinField = $actor;
497 $conds['actor'] = $db->makeList( [ $joinField => $actors ], IDatabase::LIST_AND );
499 } else {
500 if ( $ids ) {
501 $conds['userid'] = $db->makeList( [ $key => $ids ], IDatabase::LIST_AND );
503 if ( $names ) {
504 $conds['username'] = $db->makeList( [ $text => $names ], IDatabase::LIST_AND );
508 return [
509 'tables' => $tables,
510 'conds' => $conds ? $db->makeList( array_values( $conds ), IDatabase::LIST_OR ) : '1=0',
511 'orconds' => $conds,
512 'joins' => $joins,