3 * Refresh image metadata fields. See also rebuildImages.php
5 * Usage: php refreshImageMetadata.php
7 * Copyright © 2011 Brian Wolff
8 * https://www.mediawiki.org/
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 * http://www.gnu.org/copyleft/gpl.html
27 * @ingroup Maintenance
30 // @codeCoverageIgnoreStart
31 require_once __DIR__
. '/Maintenance.php';
32 // @codeCoverageIgnoreEnd
34 use MediaWiki\FileRepo\File\FileSelectQueryBuilder
;
35 use MediaWiki\Maintenance\Maintenance
;
36 use Wikimedia\Rdbms\IExpression
;
37 use Wikimedia\Rdbms\IMaintainableDatabase
;
38 use Wikimedia\Rdbms\IReadableDatabase
;
39 use Wikimedia\Rdbms\LikeValue
;
40 use Wikimedia\Rdbms\SelectQueryBuilder
;
43 * Maintenance script to refresh image metadata fields.
45 * @ingroup Maintenance
47 class RefreshImageMetadata
extends Maintenance
{
50 * @var IMaintainableDatabase
54 public function __construct() {
55 parent
::__construct();
57 $this->addDescription( 'Script to update image metadata records' );
58 $this->setBatchSize( 200 );
62 'Reload metadata from file even if the metadata looks ok',
69 'Only fix really broken records, leave old but still compatible records alone.'
73 'Fix records with an out of date serialization format.'
77 'Enable splitting out large metadata items to the text table. Implies --convert-to-json.'
81 'Output extra information about each upgraded/non-upgraded file.',
86 $this->addOption( 'start', 'Name of file to start with', false, true );
87 $this->addOption( 'end', 'Name of file to end with', false, true );
91 'Only refresh files with this media type, e.g. BITMAP, UNKNOWN etc.',
97 "Only refresh files with this MIME type. Can accept wild-card 'image/*'. "
98 . "Potentially inefficient unless 'mediatype' is also specified",
104 '(Inefficient!) Only refresh files where the img_metadata field '
105 . 'contains this string. Can be used if its known a specific '
106 . 'property was being extracted incorrectly.',
112 'Time to sleep between each batch (in seconds). Default: 0',
116 $this->addOption( 'oldimage', 'Run and refresh on oldimage table.' );
119 public function execute() {
120 $force = $this->hasOption( 'force' );
121 $brokenOnly = $this->hasOption( 'broken-only' );
122 $verbose = $this->hasOption( 'verbose' );
123 $start = $this->getOption( 'start', false );
124 $split = $this->hasOption( 'split' );
125 $sleep = (int)$this->getOption( 'sleep', 0 );
126 $reserialize = $this->hasOption( 'convert-to-json' );
127 $oldimage = $this->hasOption( 'oldimage' );
129 $dbw = $this->getPrimaryDB();
131 $fieldPrefix = 'oi_';
132 $queryBuilderTemplate = FileSelectQueryBuilder
::newForOldFile( $dbw );
134 $fieldPrefix = 'img_';
135 $queryBuilderTemplate = FileSelectQueryBuilder
::newForFile( $dbw );
141 $batchSize = intval( $this->getBatchSize() );
142 if ( $batchSize <= 0 ) {
143 $this->fatalError( "Batch size is too low...", 12 );
145 $repo = $this->newLocalRepo( $force, $brokenOnly, $reserialize, $split );
146 $this->setConditions( $dbw, $queryBuilderTemplate, $fieldPrefix );
147 $queryBuilderTemplate
148 ->orderBy( $fieldPrefix . 'name', SelectQueryBuilder
::SORT_ASC
)
149 ->limit( $batchSize );
151 $batchCondition = [];
152 // For the WHERE img_name > 'foo' condition that comes after doing a batch
153 if ( $start !== false ) {
154 $batchCondition[] = $dbw->expr( $fieldPrefix . 'name', '>=', $start );
157 $queryBuilder = clone $queryBuilderTemplate;
158 $res = $queryBuilder->andWhere( $batchCondition )
159 ->caller( __METHOD__
)->fetchResultSet();
160 $nameField = $fieldPrefix . 'name';
161 if ( $res->numRows() > 0 ) {
162 $row1 = $res->current();
163 $this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->$nameField}.\n" );
167 foreach ( $res as $row ) {
169 // LocalFile will upgrade immediately here if obsolete
170 $file = $repo->newFileFromRow( $row );
171 $file->maybeUpgradeRow();
172 if ( $file->getUpgraded() ) {
173 // File was upgraded.
175 $this->output( "Refreshed File:{$row->$nameField}.\n" );
181 $this->output( "Forcibly refreshed File:{$row->$nameField}.\n" );
185 $this->output( "Skipping File:{$row->$nameField}.\n" );
189 } catch ( Exception
$e ) {
190 $this->output( "{$row->$nameField} failed. {$e->getMessage()}\n" );
193 if ( $res->numRows() > 0 ) {
194 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
195 $batchCondition = [ $dbw->expr( $fieldPrefix . 'name', '>', $row->$nameField ) ];
197 $this->waitForReplication();
201 } while ( $res->numRows() === $batchSize );
203 $total = $upgraded +
$leftAlone;
205 $this->output( "\nFinished refreshing file metadata for $total files. "
206 . "$upgraded needed to be refreshed, $leftAlone did not need to "
207 . "be but were refreshed anyways, and $error refreshes were suspicious.\n" );
209 $this->output( "\nFinished refreshing file metadata for $total files. "
210 . "$upgraded were refreshed, $leftAlone were already up to date, "
211 . "and $error refreshes were suspicious.\n" );
216 * @param IReadableDatabase $dbw
217 * @param SelectQueryBuilder $queryBuilder
218 * @param string $fieldPrefix like img_ or oi_
221 private function setConditions( IReadableDatabase
$dbw, SelectQueryBuilder
$queryBuilder, $fieldPrefix ) {
222 $end = $this->getOption( 'end', false );
223 $mime = $this->getOption( 'mime', false );
224 $mediatype = $this->getOption( 'mediatype', false );
225 $like = $this->getOption( 'metadata-contains', false );
227 if ( $end !== false ) {
228 $queryBuilder->andWhere( $dbw->expr( $fieldPrefix . 'name', '<=', $end ) );
230 if ( $mime !== false ) {
231 [ $major, $minor ] = File
::splitMime( $mime );
232 $queryBuilder->andWhere( [ $fieldPrefix . 'major_mime' => $major ] );
233 if ( $minor !== '*' ) {
234 $queryBuilder->andWhere( [ $fieldPrefix . 'minor_mime' => $minor ] );
237 if ( $mediatype !== false ) {
238 $queryBuilder->andWhere( [ $fieldPrefix . 'media_type' => $mediatype ] );
241 $queryBuilder->andWhere(
242 $dbw->expr( $fieldPrefix . 'metadata', IExpression
::LIKE
,
243 new LikeValue( $dbw->anyString(), $like, $dbw->anyString() ) )
250 * @param bool $brokenOnly
251 * @param bool $reserialize
256 private function newLocalRepo( $force, $brokenOnly, $reserialize, $split ): LocalRepo
{
257 if ( $brokenOnly && $force ) {
258 $this->fatalError( 'Cannot use --broken-only and --force together. ', 2 );
260 $reserialize = $reserialize ||
$split;
261 if ( $brokenOnly && $reserialize ) {
262 $this->fatalError( 'Cannot use --broken-only with --convert-to-json or --split. ',
267 'updateCompatibleMetadata' => !$brokenOnly,
269 if ( $reserialize ) {
270 $overrides['reserializeMetadata'] = true;
271 $overrides['useJsonMetadata'] = true;
274 $overrides['useSplitMetadata'] = true;
277 return $this->getServiceContainer()->getRepoGroup()
278 ->newCustomLocalRepo( $overrides );
282 // @codeCoverageIgnoreStart
283 $maintClass = RefreshImageMetadata
::class;
284 require_once RUN_MAINTENANCE_IF_MAIN
;
285 // @codeCoverageIgnoreEnd