updater: Move rev_sha1 addition before convertUserOptions
[mediawiki.git] / maintenance / importImages.php
blobcbbcf0f9e602aee2ce518785b272efd8f694e457
1 <?php
2 /**
3 * Import one or more images from the local file system into the wiki without
4 * using the web-based interface.
6 * "Smart import" additions:
7 * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki
8 * - process:
9 * - interface with the source wiki, don't use bare files only (see --source-wiki-url).
10 * - fetch metadata from source wiki for each file to import.
11 * - commit the fetched metadata to the destination wiki while submitting.
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License along
24 * with this program; if not, write to the Free Software Foundation, Inc.,
25 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
26 * http://www.gnu.org/copyleft/gpl.html
28 * @file
29 * @ingroup Maintenance
30 * @author Rob Church <robchur@gmail.com>
31 * @author Mij <mij@bitchx.it>
34 $optionsWithArgs = array(
35 'extensions', 'comment', 'comment-file', 'comment-ext', 'summary', 'user',
36 'license', 'sleep', 'limit', 'from', 'source-wiki-url', 'timestamp',
38 require_once __DIR__ . '/commandLine.inc';
39 require_once __DIR__ . '/importImages.inc';
40 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
42 echo "Import Images\n\n";
44 # Need a path
45 if ( count( $args ) == 0 ) {
46 showUsage();
49 $dir = $args[0];
51 # Check Protection
52 if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) ) {
53 die( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
56 if ( isset( $options['protect'] ) && $options['protect'] == 1 ) {
57 die( "You must specify a protection option.\n" );
60 # Prepare the list of allowed extensions
61 global $wgFileExtensions;
62 $extensions = isset( $options['extensions'] )
63 ? explode( ',', strtolower( $options['extensions'] ) )
64 : $wgFileExtensions;
66 # Search the path provided for candidates for import
67 $files = findFiles( $dir, $extensions, isset( $options['search-recursively'] ) );
69 # Initialise the user for this operation
70 $user = isset( $options['user'] )
71 ? User::newFromName( $options['user'] )
72 : User::newFromName( 'Maintenance script' );
73 if ( !$user instanceof User ) {
74 $user = User::newFromName( 'Maintenance script' );
76 $wgUser = $user;
78 # Get block check. If a value is given, this specified how often the check is performed
79 if ( isset( $options['check-userblock'] ) ) {
80 if ( !$options['check-userblock'] ) {
81 $checkUserBlock = 1;
82 } else {
83 $checkUserBlock = (int)$options['check-userblock'];
85 } else {
86 $checkUserBlock = false;
89 # Get --from
90 $from = @$options['from'];
92 # Get sleep time.
93 $sleep = @$options['sleep'];
94 if ( $sleep ) {
95 $sleep = (int)$sleep;
98 # Get limit number
99 $limit = @$options['limit'];
100 if ( $limit ) {
101 $limit = (int)$limit;
104 $timestamp = isset( $options['timestamp'] ) ? $options['timestamp'] : false;
106 # Get the upload comment. Provide a default one in case there's no comment given.
107 $comment = 'Importing file';
109 if ( isset( $options['comment-file'] ) ) {
110 $comment = file_get_contents( $options['comment-file'] );
111 if ( $comment === false || $comment === null ) {
112 die( "failed to read comment file: {$options['comment-file']}\n" );
114 } elseif ( isset( $options['comment'] ) ) {
115 $comment = $options['comment'];
118 $commentExt = isset( $options['comment-ext'] ) ? $options['comment-ext'] : false;
120 $summary = isset( $options['summary'] ) ? $options['summary'] : '';
122 # Get the license specifier
123 $license = isset( $options['license'] ) ? $options['license'] : '';
125 # Batch "upload" operation
126 $count = count( $files );
127 if ( $count > 0 ) {
129 foreach ( $files as $file ) {
130 $base = wfBaseName( $file );
132 # Validate a title
133 $title = Title::makeTitleSafe( NS_FILE, $base );
134 if ( !is_object( $title ) ) {
135 echo "{$base} could not be imported; a valid title cannot be produced\n";
136 continue;
139 if ( $from ) {
140 if ( $from == $title->getDBkey() ) {
141 $from = null;
142 } else {
143 $ignored++;
144 continue;
148 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
149 $user->clearInstanceCache( 'name' ); // reload from DB!
150 if ( $user->isBlocked() ) {
151 echo $user->getName() . " was blocked! Aborting.\n";
152 break;
156 # Check existence
157 $image = wfLocalFile( $title );
158 if ( $image->exists() ) {
159 if ( isset( $options['overwrite'] ) ) {
160 echo "{$base} exists, overwriting...";
161 $svar = 'overwritten';
162 } else {
163 echo "{$base} exists, skipping\n";
164 $skipped++;
165 continue;
167 } else {
168 if ( isset( $options['skip-dupes'] ) ) {
169 $repo = $image->getRepo();
170 $sha1 = File::sha1Base36( $file ); # XXX: we end up calculating this again when actually uploading. that sucks.
172 $dupes = $repo->findBySha1( $sha1 );
174 if ( $dupes ) {
175 echo "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n";
176 $skipped++;
177 continue;
181 echo "Importing {$base}...";
182 $svar = 'added';
185 if ( isset( $options['source-wiki-url'] ) ) {
186 /* find comment text directly from source wiki, through MW's API */
187 $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
188 if ( $real_comment === false ) {
189 $commentText = $comment;
190 } else {
191 $commentText = $real_comment;
194 /* find user directly from source wiki, through MW's API */
195 $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
196 if ( $real_user === false ) {
197 $wgUser = $user;
198 } else {
199 $wgUser = User::newFromName( $real_user );
200 if ( $wgUser === false ) {
201 # user does not exist in target wiki
202 echo "failed: user '$real_user' does not exist in target wiki.";
203 continue;
206 } else {
207 # Find comment text
208 $commentText = false;
210 if ( $commentExt ) {
211 $f = findAuxFile( $file, $commentExt );
212 if ( !$f ) {
213 echo " No comment file with extension {$commentExt} found for {$file}, using default comment. ";
214 } else {
215 $commentText = file_get_contents( $f );
216 if ( !$commentText ) {
217 echo " Failed to load comment file {$f}, using default comment. ";
222 if ( !$commentText ) {
223 $commentText = $comment;
227 # Import the file
228 if ( isset( $options['dry'] ) ) {
229 echo " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... ";
230 } else {
231 $props = FSFile::getPropsFromPath( $file );
232 $flags = 0;
233 $options = array();
234 $handler = MediaHandler::getHandler( $props['mime'] );
235 if ( $handler ) {
236 $options['headers'] = $handler->getStreamHeaders( $props['metadata'] );
237 } else {
238 $options['headers'] = array();
240 $archive = $image->publish( $file, $flags, $options );
241 if ( !$archive->isGood() ) {
242 echo "failed. (" .
243 $archive->getWikiText() .
244 ")\n";
245 $failed++;
246 continue;
250 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
251 if ( !$summary ) {
252 $summary = $commentText;
255 if ( isset( $options['dry'] ) ) {
256 echo "done.\n";
257 } elseif ( $image->recordUpload2( $archive->value, $summary, $commentText, $props, $timestamp ) ) {
258 # We're done!
259 echo "done.\n";
261 $doProtect = false;
263 global $wgRestrictionLevels;
265 $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
267 if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
268 $doProtect = true;
270 if ( isset( $options['unprotect'] ) ) {
271 $protectLevel = '';
272 $doProtect = true;
275 if ( $doProtect ) {
276 # Protect the file
277 echo "\nWaiting for slaves...\n";
278 // Wait for slaves.
279 sleep( 2.0 ); # Why this sleep?
280 wfWaitForSlaves();
282 echo "\nSetting image restrictions ... ";
284 $cascade = false;
285 $restrictions = array();
286 foreach ( $title->getRestrictionTypes() as $type ) {
287 $restrictions[$type] = $protectLevel;
290 $page = WikiPage::factory( $title );
291 $status = $page->doUpdateRestrictions( $restrictions, array(), $cascade, '', $user );
292 echo ( $status->isOK() ? 'done' : 'failed' ) . "\n";
295 } else {
296 echo "failed. (at recordUpload stage)\n";
297 $svar = 'failed';
300 $$svar++;
301 $processed++;
303 if ( $limit && $processed >= $limit ) {
304 break;
307 if ( $sleep ) {
308 sleep( $sleep );
312 # Print out some statistics
313 echo "\n";
314 foreach ( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored',
315 'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten',
316 'failed' => 'Failed' ) as $var => $desc ) {
317 if ( $$var > 0 ) {
318 echo "{$desc}: {$$var}\n";
322 } else {
323 echo "No suitable files could be found for import.\n";
326 exit( 0 );
328 function showUsage( $reason = false ) {
329 if ( $reason ) {
330 echo $reason . "\n";
333 echo <<<TEXT
334 Imports images and other media files into the wiki
335 USAGE: php importImages.php [options] <dir>
337 <dir> : Path to the directory containing images to be imported
339 Options:
340 --extensions=<exts> Comma-separated list of allowable extensions, defaults to \$wgFileExtensions
341 --overwrite Overwrite existing images with the same name (default is to skip them)
342 --limit=<num> Limit the number of images to process. Ignored or skipped images are not counted.
343 --from=<name> Ignore all files until the one with the given name. Useful for resuming
344 aborted imports. <name> should be the file's canonical database form.
345 --skip-dupes Skip images that were already uploaded under a different name (check SHA1)
346 --search-recursively Search recursively for files in subdirectories
347 --sleep=<sec> Sleep between files. Useful mostly for debugging.
348 --user=<username> Set username of uploader, default 'Maintenance script'
349 --check-userblock Check if the user got blocked during import.
350 --comment=<text> Set file description, default 'Importing file'.
351 --comment-file=<file> Set description to the content of <file>.
352 --comment-ext=<ext> Causes the description for each file to be loaded from a file with the same name
353 but the extension <ext>. If a global description is also given, it is appended.
354 --license=<code> Use an optional license template
355 --dry Dry run, don't import anything
356 --protect=<protect> Specify the protect value (autoconfirmed,sysop)
357 --summary=<summary> Upload summary, description will be used if not provided
358 --timestamp=<timestamp> Override upload time/date, all MediaWiki timestamp formats are accepted
359 --unprotect Unprotects all uploaded images
360 --source-wiki-url If specified, take User and Comment data for each imported file from this URL.
361 For example, --source-wiki-url="http://en.wikipedia.org/"
363 TEXT;
364 exit( 1 );