Merge "docs: Fix typo"
[mediawiki.git] / maintenance / deleteSelfExternals.php
blobe1f625095e687711da6a74eb665738be2cbe96c6
1 <?php
2 /**
3 * Delete self-references to $wgServer from the externallinks table.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Maintenance
24 use MediaWiki\ExternalLinks\LinkFilter;
25 use MediaWiki\MainConfigNames;
26 use MediaWiki\Maintenance\Maintenance;
28 // @codeCoverageIgnoreStart
29 require_once __DIR__ . '/Maintenance.php';
30 // @codeCoverageIgnoreEnd
32 /**
33 * Maintenance script that deletes self-references to $wgServer
34 * from the externallinks table.
36 * @ingroup Maintenance
38 class DeleteSelfExternals extends Maintenance {
39 public function __construct() {
40 parent::__construct();
41 $this->addDescription( 'Delete self-references to $wgServer from externallinks' );
42 $this->setBatchSize( 1000 );
45 public function execute() {
46 // Extract the host and scheme from $wgServer
47 $server = $this->getConfig()->get( MainConfigNames::Server );
48 $bits = $this->getServiceContainer()->getUrlUtils()->parse( $server );
49 if ( !$bits ) {
50 $this->fatalError( 'Could not parse $wgServer' );
53 $this->output( "Deleting self externals from $server\n" );
54 $db = $this->getPrimaryDB();
56 // If it's protocol-relative, we need to do both http and https.
57 // Otherwise, just do the specified scheme.
58 $host = $bits['host'];
59 if ( isset( $bits['port'] ) ) {
60 $host .= ':' . $bits['port'];
62 if ( $bits['scheme'] != '' ) {
63 $conds = [ LinkFilter::getQueryConditions( $host, [ 'protocol' => $bits['scheme'] . '://' ] ) ];
64 } else {
65 $conds = [
66 LinkFilter::getQueryConditions( $host, [ 'protocol' => 'http://' ] ),
67 LinkFilter::getQueryConditions( $host, [ 'protocol' => 'https://' ] ),
71 // Convert the array of $conds into an IExpression object for use in the DELETE query
72 // The use of array_filter is just there for a sanity check, as LinkFilter::getQueryConditions
73 // only returns false if the host was invalid (we have already validated this above).
74 $conds = array_map( static function ( $cond ) use ( $db ) {
75 return $db->andExpr( $cond );
76 }, array_filter( $conds ) );
77 $domainExpr = $db->orExpr( $conds );
79 $totalRows = 0;
80 $batchStart = 0;
81 $batchEnd = $batchStart + $this->getBatchSize();
82 do {
83 $this->output( "Deleting self-externals with el_id $batchStart to $batchEnd\n" );
85 $db->newDeleteQueryBuilder()
86 ->deleteFrom( 'externallinks' )
87 ->where( $domainExpr )
88 ->andWhere( $db->expr( 'el_id', '>', $batchStart ) )
89 ->andWhere( $db->expr( 'el_id', '<=', $batchEnd ) )
90 ->caller( __METHOD__ )
91 ->execute();
92 $rowsDeletedInThisBatch = $db->affectedRows();
93 $totalRows += $rowsDeletedInThisBatch;
95 $batchStart += $this->getBatchSize();
96 $batchEnd += $this->getBatchSize();
97 $this->waitForReplication();
98 } while ( $rowsDeletedInThisBatch );
100 $this->output( "done; deleted $totalRows rows\n" );
104 // @codeCoverageIgnoreStart
105 $maintClass = DeleteSelfExternals::class;
106 require_once RUN_MAINTENANCE_IF_MAIN;
107 // @codeCoverageIgnoreEnd