3 * Refresh image metadata fields. See also rebuildImages.php
5 * Usage: php refreshImageMetadata.php
7 * Copyright © 2011 Brian Wolff
8 * https://www.mediawiki.org/
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 * http://www.gnu.org/copyleft/gpl.html
27 * @ingroup Maintenance
30 require_once __DIR__
. '/Maintenance.php';
32 use MediaWiki\FileRepo\File\FileSelectQueryBuilder
;
33 use Wikimedia\Rdbms\IMaintainableDatabase
;
34 use Wikimedia\Rdbms\IReadableDatabase
;
35 use Wikimedia\Rdbms\SelectQueryBuilder
;
38 * Maintenance script to refresh image metadata fields.
40 * @ingroup Maintenance
42 class RefreshImageMetadata
extends Maintenance
{
45 * @var IMaintainableDatabase
49 public function __construct() {
50 parent
::__construct();
52 $this->addDescription( 'Script to update image metadata records' );
53 $this->setBatchSize( 200 );
57 'Reload metadata from file even if the metadata looks ok',
64 'Only fix really broken records, leave old but still compatible records alone.'
68 'Fix records with an out of date serialization format.'
72 'Enable splitting out large metadata items to the text table. Implies --convert-to-json.'
76 'Output extra information about each upgraded/non-upgraded file.',
81 $this->addOption( 'start', 'Name of file to start with', false, true );
82 $this->addOption( 'end', 'Name of file to end with', false, true );
86 'Only refresh files with this media type, e.g. BITMAP, UNKNOWN etc.',
92 "Only refresh files with this MIME type. Can accept wild-card 'image/*'. "
93 . "Potentially inefficient unless 'mediatype' is also specified",
99 '(Inefficient!) Only refresh files where the img_metadata field '
100 . 'contains this string. Can be used if its known a specific '
101 . 'property was being extracted incorrectly.',
107 'Time to sleep between each batch (in seconds). Default: 0',
111 $this->addOption( 'oldimage', 'Run and refresh on oldimage table.' );
114 public function execute() {
115 $force = $this->hasOption( 'force' );
116 $brokenOnly = $this->hasOption( 'broken-only' );
117 $verbose = $this->hasOption( 'verbose' );
118 $start = $this->getOption( 'start', false );
119 $split = $this->hasOption( 'split' );
120 $sleep = (int)$this->getOption( 'sleep', 0 );
121 $reserialize = $this->hasOption( 'convert-to-json' );
122 $oldimage = $this->hasOption( 'oldimage' );
124 $dbw = $this->getDB( DB_PRIMARY
);
126 $fieldPrefix = 'oi_';
127 $queryBuilderTemplate = FileSelectQueryBuilder
::newForOldFile( $dbw );
129 $fieldPrefix = 'img_';
130 $queryBuilderTemplate = FileSelectQueryBuilder
::newForFile( $dbw );
136 $batchSize = intval( $this->getBatchSize() );
137 if ( $batchSize <= 0 ) {
138 $this->fatalError( "Batch size is too low...", 12 );
140 $repo = $this->newLocalRepo( $force, $brokenOnly, $reserialize, $split );
141 $this->setConditions( $dbw, $queryBuilderTemplate, $fieldPrefix );
142 $queryBuilderTemplate
143 ->orderBy( $fieldPrefix . 'name', SelectQueryBuilder
::SORT_ASC
)
144 ->limit( $batchSize );
146 $batchCondition = [];
147 // For the WHERE img_name > 'foo' condition that comes after doing a batch
148 if ( $start !== false ) {
149 $batchCondition[] = $fieldPrefix . 'name >= ' . $dbw->addQuotes( $start );
152 $queryBuilder = clone $queryBuilderTemplate;
153 $res = $queryBuilder->andWhere( $batchCondition )
154 ->caller( __METHOD__
)->fetchResultSet();
155 $nameField = $fieldPrefix . 'name';
156 if ( $res->numRows() > 0 ) {
157 $row1 = $res->current();
158 $this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->$nameField}.\n" );
162 foreach ( $res as $row ) {
164 // LocalFile will upgrade immediately here if obsolete
165 $file = $repo->newFileFromRow( $row );
166 $file->maybeUpgradeRow();
167 if ( $file->getUpgraded() ) {
168 // File was upgraded.
170 $this->output( "Refreshed File:{$row->$nameField}.\n" );
176 $this->output( "Forcibly refreshed File:{$row->$nameField}.\n" );
180 $this->output( "Skipping File:{$row->$nameField}.\n" );
184 } catch ( Exception
$e ) {
185 $this->output( "{$row->$nameField} failed. {$e->getMessage()}\n" );
188 if ( $res->numRows() > 0 ) {
189 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
190 $batchCondition = [ $fieldPrefix . 'name > ' . $dbw->addQuotes( $row->$nameField ) ];
192 $this->waitForReplication();
196 } while ( $res->numRows() === $batchSize );
198 $total = $upgraded +
$leftAlone;
200 $this->output( "\nFinished refreshing file metadata for $total files. "
201 . "$upgraded needed to be refreshed, $leftAlone did not need to "
202 . "be but were refreshed anyways, and $error refreshes were suspicious.\n" );
204 $this->output( "\nFinished refreshing file metadata for $total files. "
205 . "$upgraded were refreshed, $leftAlone were already up to date, "
206 . "and $error refreshes were suspicious.\n" );
211 * @param IReadableDatabase $dbw
212 * @param SelectQueryBuilder $queryBuilder
213 * @param string $fieldPrefix like img_ or oi_
216 private function setConditions( IReadableDatabase
$dbw, SelectQueryBuilder
$queryBuilder, $fieldPrefix ) {
217 $end = $this->getOption( 'end', false );
218 $mime = $this->getOption( 'mime', false );
219 $mediatype = $this->getOption( 'mediatype', false );
220 $like = $this->getOption( 'metadata-contains', false );
222 if ( $end !== false ) {
223 $queryBuilder->andWhere( $fieldPrefix . 'name <= ' . $dbw->addQuotes( $end ) );
225 if ( $mime !== false ) {
226 [ $major, $minor ] = File
::splitMime( $mime );
227 $queryBuilder->andWhere( [ $fieldPrefix . 'major_mime' => $major ] );
228 if ( $minor !== '*' ) {
229 $queryBuilder->andWhere( [ $fieldPrefix . 'minor_mime' => $minor ] );
232 if ( $mediatype !== false ) {
233 $queryBuilder->andWhere( [ $fieldPrefix . 'media_type' => $mediatype ] );
236 $queryBuilder->andWhere(
237 $fieldPrefix . 'metadata ' . $dbw->buildLike( $dbw->anyString(), $like, $dbw->anyString() )
244 * @param bool $brokenOnly
245 * @param bool $reserialize
250 private function newLocalRepo( $force, $brokenOnly, $reserialize, $split ): LocalRepo
{
251 if ( $brokenOnly && $force ) {
252 $this->fatalError( 'Cannot use --broken-only and --force together. ', 2 );
254 $reserialize = $reserialize ||
$split;
255 if ( $brokenOnly && $reserialize ) {
256 $this->fatalError( 'Cannot use --broken-only with --convert-to-json or --split. ',
261 'updateCompatibleMetadata' => !$brokenOnly,
263 if ( $reserialize ) {
264 $overrides['reserializeMetadata'] = true;
265 $overrides['useJsonMetadata'] = true;
268 $overrides['useSplitMetadata'] = true;
271 return $this->getServiceContainer()->getRepoGroup()
272 ->newCustomLocalRepo( $overrides );
276 $maintClass = RefreshImageMetadata
::class;
277 require_once RUN_MAINTENANCE_IF_MAIN
;