Make user_ip in user_newtalk a TEXT, as it is not an IP field.
[mediawiki.git] / maintenance / dumpHTML.inc
blobca2a7df6d88be3adfd04848d285550e86389bba2
1 <?php
2 /**
3  * @addtogroup Maintenance
4  */
6 define( 'REPORTING_INTERVAL', 10 );
8 require_once( 'includes/ImagePage.php' );
9 require_once( 'includes/CategoryPage.php' );
10 require_once( 'includes/RawPage.php' );
12 class DumpHTML {
13         # Destination directory
14         var $dest;
16         # Skip existing files
17         var $noOverwrite = false;
19         # Show interlanguage links?
20         var $interwiki = true;
22         # Depth of HTML directory tree
23         var $depth = 3;
25         # Directory that commons images are copied into
26         var $sharedStaticDirectory;
28         # Directory that the images are in, after copying
29         var $destUploadDirectory;
31         # Relative path to image directory
32         var $imageRel = 'upload';
34         # Copy commons images instead of symlinking
35         var $forceCopy = false;
37         # Make a copy of all images encountered
38         var $makeSnapshot = false;
40         # Don't image description pages in doEverything()
41         var $noSharedDesc = false;
43         # Make links assuming the script path is in the same directory as
44         # the destination
45         var $alternateScriptPath = false;
47         # Original values of various globals
48         var $oldArticlePath = false, $oldCopyrightIcon = false;
50         # Has setupGlobals been called?
51         var $setupDone = false;
53         # Has to compress html pages
54         var $compress = false;
56         # List of raw pages used in the current article
57         var $rawPages;
59         # Skin to use
60         var $skin = 'htmldump';
62         # Checkpoint stuff
63         var $checkpointFile = false, $checkpoints = false;
65         var $startID = 1, $endID = false;
67         var $sliceNumerator = 1, $sliceDenominator = 1;
69         # Max page ID, lazy initialised
70         var $maxPageID = false;
72         # UDP profiling
73         var $udpProfile, $udpProfileCounter = 0, $udpProfileInit = false;
75         function DumpHTML( $settings = array() ) {
76                 foreach ( $settings as $var => $value ) {
77                         $this->$var = $value;
78                 }
79         }
81         function loadCheckpoints() {
82                 if ( $this->checkpoints !== false ) {
83                         return true;
84                 } elseif ( !$this->checkpointFile ) {
85                         return false;
86                 } else {
87                         $lines = @file( $this->checkpointFile );
88                         if ( $lines === false ) {
89                                 print "Starting new checkpoint file \"{$this->checkpointFile}\"\n";
90                                 $this->checkpoints = array();
91                         } else {
92                                 $lines = array_map( 'trim', $lines );
93                                 $this->checkpoints = array();
94                                 foreach ( $lines as $line ) {
95                                         list( $name, $value ) = explode( '=', $line, 2 );
96                                         $this->checkpoints[$name] = $value;
97                                 }
98                         }
99                         return true;
100                 }
101         }
103         function getCheckpoint( $type, $defValue = false ) {
104                 if ( !$this->loadCheckpoints() ) {
105                         return false;
106                 }
107                 if ( !isset( $this->checkpoints[$type] ) ) {
108                         return false;
109                 } else {
110                         return $this->checkpoints[$type];
111                 }
112         }
114         function setCheckpoint( $type, $value ) {
115                 if ( !$this->checkpointFile ) {
116                         return;
117                 }
118                 $this->checkpoints[$type] = $value;
119                 $blob = '';
120                 foreach ( $this->checkpoints as $type => $value ) {
121                         $blob .= "$type=$value\n";
122                 }
123                 file_put_contents( $this->checkpointFile, $blob );
124         }
126         function doEverything() {
127                 if ( $this->getCheckpoint( 'everything' ) == 'done' ) {
128                         print "Checkpoint says everything is already done\n";
129                         return;
130                 }
131                 $this->doArticles();
132                 $this->doCategories();
133                 $this->doRedirects();
134                 if ( $this->sliceNumerator == 1 ) {
135                         $this->doSpecials();
136                 }
137                 $this->doLocalImageDescriptions();
139                 if ( !$this->noSharedDesc ) {
140                         $this->doSharedImageDescriptions();
141                 }
143                 $this->setCheckpoint( 'everything', 'done' );
144         }
146         /**
147          * Write a set of articles specified by start and end page_id
148          * Skip categories and images, they will be done separately
149          */
150         function doArticles() {
151                 if ( $this->endID === false ) {
152                         $end = $this->getMaxPageID();
153                 } else {
154                         $end = $this->endID;
155                 }
156                 $start = $this->startID;
157                 
158                 # Start from the checkpoint
159                 $cp = $this->getCheckpoint( 'article' );
160                 if ( $cp == 'done' ) {
161                         print "Articles already done\n";
162                         return;
163                 } elseif ( $cp !== false ) {
164                         $start = $cp;
165                         print "Resuming article dump from checkpoint at page_id $start of $end\n";
166                 } else {
167                         print "Starting from page_id $start of $end\n";
168                 }
170                 # Move the start point to the correct slice if it isn't there already
171                 $start = $this->modSliceStart( $start );
173                 $this->setupGlobals();
175                 $mainPageObj = Title::newMainPage();
176                 $mainPage = $mainPageObj->getPrefixedDBkey();
178                 for ( $id = $start, $i = 0; $id <= $end; $id += $this->sliceDenominator, $i++ ) {
179                         wfWaitForSlaves( 20 );
180                         if ( !( $i % REPORTING_INTERVAL) ) {
181                                 print "Processing ID: $id\r";
182                                 $this->setCheckpoint( 'article', $id );
183                         }
184                         if ( !($i % (REPORTING_INTERVAL*10) ) ) {
185                                 print "\n";
186                         }
187                         $title = Title::newFromID( $id );
188                         if ( $title ) {
189                                 $ns = $title->getNamespace() ;
190                                 if ( $ns != NS_CATEGORY && $ns != NS_MEDIAWIKI && 
191                                   $title->getPrefixedDBkey() != $mainPage ) {
192                                         $this->doArticle( $title );
193                                 }
194                         }
195                 }
196                 $this->setCheckpoint( 'article', 'done' );
197                 print "\n";
198         }
200         function doSpecials() {
201                 $this->doMainPage();
203                 $this->setupGlobals();
204                 print "Special:Categories...";
205                 $this->doArticle( SpecialPage::getTitleFor( 'Categories' ) );
206                 print "\n";
207         }
209         /** Write the main page as index.html */
210         function doMainPage() {
212                 print "Making index.html  ";
214                 // Set up globals with no ../../.. in the link URLs
215                 $this->setupGlobals( 0 );
217                 $title = Title::newMainPage();
218                 $text = $this->getArticleHTML( $title );
219                 
220                 # Parse the XHTML to find the images
221                 $images = $this->findImages( $text );
222                 $this->copyImages( $images );
223                 
224                 $file = fopen( "{$this->dest}/index.html", "w" );
225                 if ( !$file ) {
226                         print "\nCan't open index.html for writing\n";
227                         return false;
228                 }
229                 fwrite( $file, $text );
230                 fclose( $file );
231                 print "\n";
232         }
234         function doImageDescriptions() {
235                 $this->doLocalImageDescriptions();
236                 if ( !$this->noSharedDesc ) {
237                         $this->doSharedImageDescriptions();
238                 }
239         }
241         /**
242          * Dump image description pages that don't have an associated article, but do
243          * have a local image
244          */
245         function doLocalImageDescriptions() {
246                 $chunkSize = 1000;
248                 $dbr = wfGetDB( DB_SLAVE );
249                 
250                 $cp = $this->getCheckpoint( 'local image' );
251                 if ( $cp == 'done' ) {
252                         print "Local image descriptions already done\n";
253                         return;
254                 } elseif ( $cp !== false ) {
255                         print "Writing image description pages starting from $cp\n";
256                         $conds = array( 'img_name >= ' . $dbr->addQuotes( $cp ) );
257                 } else {
258                         print "Writing image description pages for local images\n";             
259                         $conds = false;
260                 }
262                 $this->setupGlobals();
263                 $i = 0;
265                 do {
266                         $res = $dbr->select( 'image', array( 'img_name' ), $conds, __METHOD__, 
267                                 array( 'ORDER BY' => 'img_name', 'LIMIT' => $chunkSize ) );
268                         $numRows = $dbr->numRows( $res );
270                         while ( $row = $dbr->fetchObject( $res ) ) {
271                                 # Update conds for the next chunk query
272                                 $conds = array( 'img_name > ' . $dbr->addQuotes( $row->img_name ) );
273                                 
274                                 // Slice the result set with a filter
275                                 if ( !$this->sliceFilter( $row->img_name ) ) {
276                                         continue;
277                                 }
279                                 wfWaitForSlaves( 10 );
280                                 if ( !( ++$i % REPORTING_INTERVAL ) ) {
281                                         print "{$row->img_name}\n";
282                                         if ( $row->img_name !== 'done' ) {
283                                                 $this->setCheckpoint( 'local image', $row->img_name );
284                                         }
285                                 }
286                                 $title = Title::makeTitle( NS_IMAGE, $row->img_name );
287                                 if ( $title->getArticleID() ) {
288                                         // Already done by dumpHTML
289                                         continue;
290                                 }
291                                 $this->doArticle( $title );
292                         }
293                         $dbr->freeResult( $res );
294                 } while ( $numRows );
295                 
296                 $this->setCheckpoint( 'local image', 'done' );
297                 print "\n";
298         }
300         /**
301          * Dump images which only have a real description page on commons
302          */
303         function doSharedImageDescriptions() {
304                 list( $start, $end ) = $this->sliceRange( 0, 255 );
306                 $cp = $this->getCheckpoint( 'shared image' );
307                 if ( $cp == 'done' ) {
308                         print "Shared description pages already done\n";
309                         return;
310                 } elseif ( $cp !== false ) {
311                         print "Writing description pages for commons images starting from directory $cp/255\n";
312                         $start = $cp;
313                 } else {
314                         print "Writing description pages for commons images\n";
315                 }
317                 $this->setupGlobals();
318                 $i = 0;
319                 for ( $hash = $start; $hash <= $end; $hash++ ) {
320                         $this->setCheckpoint( 'shared image', $hash );
322                         $dir = sprintf( "%s/%01x/%02x", $this->sharedStaticDirectory,
323                                 intval( $hash / 16 ), $hash );
324                         $handle = @opendir( $dir );
325                         while ( $handle && $file = readdir( $handle ) ) {
326                                 if ( $file[0] == '.' ) {
327                                         continue;
328                                 }
329                                 if ( !(++$i % REPORTING_INTERVAL ) ) {
330                                         print "$i\r";
331                                 }
333                                 $title = Title::makeTitleSafe( NS_IMAGE, $file );
334                                 $this->doArticle( $title );
335                         }
336                         if ( $handle ) {
337                                 closedir( $handle );
338                         }
339                 }
340                 $this->setCheckpoint( 'shared image', 'done' );
341                 print "\n";
342         }
344         function doCategories() {
345                 $chunkSize = 1000;
346                 
347                 $this->setupGlobals();
348                 $dbr = wfGetDB( DB_SLAVE );
349                 
350                 $cp = $this->getCheckpoint( 'category' );
351                 if ( $cp == 'done' ) {
352                         print "Category pages already done\n";
353                         return;
354                 } elseif ( $cp !== false ) {
355                         print "Resuming category page dump from $cp\n";
356                         $conds = array( 'cl_to >= ' . $dbr->addQuotes( $cp ) );
357                 } else {
358                         print "Starting category pages\n";
359                         $conds = false;
360                 }
362                 $i = 0;
363                 do {
364                         $res = $dbr->select( 'categorylinks', 'DISTINCT cl_to', $conds, __METHOD__, 
365                                 array( 'ORDER BY' => 'cl_to', 'LIMIT' => $chunkSize ) );
366                         $numRows = $dbr->numRows( $res );
367                         
368                         while ( $row = $dbr->fetchObject( $res ) ) {
369                                 // Set conditions for next chunk
370                                 $conds = array( 'cl_to > ' . $dbr->addQuotes( $row->cl_to ) );
371                                 
372                                 // Filter pages from other slices
373                                 if ( !$this->sliceFilter( $row->cl_to ) ) {
374                                         continue;
375                                 }
377                                 wfWaitForSlaves( 10 );
378                                 if ( !(++$i % REPORTING_INTERVAL ) ) {
379                                         print "{$row->cl_to}\n";
380                                         if ( $row->cl_to != 'done' ) {
381                                                 $this->setCheckpoint( 'category', $row->cl_to );
382                                         }
383                                 }
384                                 $title = Title::makeTitle( NS_CATEGORY, $row->cl_to );
385                                 $this->doArticle( $title );
386                         }
387                         $dbr->freeResult( $res );
388                 } while ( $numRows );
389                 
390                 $this->setCheckpoint( 'category', 'done' );
391                 print "\n";
392         }
394         function doRedirects() {
395                 print "Doing redirects...\n";
397                 $chunkSize = 10000;
398                 $end = $this->getMaxPageID();
399                 $cp = $this->getCheckpoint( 'redirect' );
400                 if ( $cp == 'done' )  {
401                         print "Redirects already done\n";
402                         return;
403                 } elseif ( $cp !== false ) {
404                         print "Resuming redirect generation from page_id $cp\n";
405                         $start = intval( $cp );
406                 } else {
407                         $start = 1;
408                 }
410                 $this->setupGlobals();
411                 $dbr = wfGetDB( DB_SLAVE );
412                 $i = 0;
414                 for ( $chunkStart = $start; $chunkStart <= $end; $chunkStart += $chunkSize ) {
415                         $chunkEnd = min( $end, $chunkStart + $chunkSize - 1 );
416                         $conds = array( 
417                                 'page_is_redirect' => 1,
418                                 "page_id BETWEEN $chunkStart AND $chunkEnd"
419                         );
420                         # Modulo slicing in SQL
421                         if ( $this->sliceDenominator != 1 ) {
422                                 $n = intval( $this->sliceNumerator );
423                                 $m = intval( $this->sliceDenominator );
424                                 $conds[] = "page_id % $m = $n";
425                         }
426                         $res = $dbr->select( 'page', array( 'page_id', 'page_namespace', 'page_title' ),
427                                 $conds, __METHOD__ );
428                         
429                         while ( $row = $dbr->fetchObject( $res ) ) {
430                                 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
431                                 if ( !(++$i % (REPORTING_INTERVAL*10) ) ) {
432                                         printf( "Done %d redirects (%2.3f%%)\n", $i, $row->page_id / $end * 100 );
433                                         $this->setCheckpoint( 'redirect', $row->page_id );
434                                 }
435                                 $this->doArticle( $title );
436                         }
437                         $dbr->freeResult( $res );
438                 }
439                 $this->setCheckpoint( 'redirect', 'done' );
440         }
442         /** Write an article specified by title */
443         function doArticle( $title ) {
444                 if ( $this->noOverwrite ) {
445                         $fileName = $this->dest.'/'.$this->getHashedFilename( $title );
446                         if ( file_exists( $fileName ) ) {
447                                 return;
448                         }
449                 }
451                 $this->profile();
453                 $this->rawPages = array();
454                 $text = $this->getArticleHTML( $title );
456                 if ( $text === false ) {
457                         return;
458                 }
460                 # Parse the XHTML to find the images
461                 $images = $this->findImages( $text );
462                 $this->copyImages( $images );
464                 # Write to file
465                 $this->writeArticle( $title, $text );
467                 # Do raw pages
468                 wfMkdirParents( "{$this->dest}/raw", 0755 );
469                 foreach( $this->rawPages as $record ) {
470                         list( $file, $title, $params ) = $record;
472                         $path = "{$this->dest}/raw/$file";
473                         if ( !file_exists( $path ) ) {
474                                 $article = new Article( $title );
475                                 $request = new FauxRequest( $params );
476                                 $rp = new RawPage( $article, $request );
477                                 $text = $rp->getRawText();
479                                 print "Writing $file\n";
480                                 $file = fopen( $path, 'w' );
481                                 if ( !$file ) {
482                                         print("Can't open file $path for writing\n");
483                                         continue;
484                                 }
485                                 fwrite( $file, $text );
486                                 fclose( $file );
487                         }
488                 }
490                 wfIncrStats( 'dumphtml_article' );
491         }
493         /** Write the given text to the file identified by the given title object */
494         function writeArticle( $title, $text ) {
495                 $filename = $this->getHashedFilename( $title );
497                 # Temporary hack for current dump, this should be moved to 
498                 # getFriendlyName() at the earliest opportunity.
499                 #
500                 # Limit filename length to 255 characters, so it works on ext3.
501                 # Titles are in fact limited to 255 characters, but dumpHTML 
502                 # adds a suffix which may put them over the limit.
503                 $length = strlen( $filename );
504                 if ( $length > 255 ) {
505                         print "Warning: Filename too long ($length bytes). Skipping.\n";
506                         return;
507                 }
508                         
509                 $fullName = "{$this->dest}/$filename";
510                 $fullDir = dirname( $fullName );
512                 if ( $this->compress ) {
513                         $fullName .= ".gz";
514                         $text = gzencode( $text, 9 );                           
515                 }
517                 wfMkdirParents( $fullDir, 0755 );
519                 wfSuppressWarnings();
520                 $file = fopen( $fullName, 'w' );
521                 wfRestoreWarnings();
523                 if ( !$file ) {
524                         die("Can't open file '$fullName' for writing.\nCheck permissions or use another destination (-d).\n");
525                         return;
526                 }
528                 fwrite( $file, $text );
529                 fclose( $file );
530         }
532         /** Set up globals required for parsing */
533         function setupGlobals( $currentDepth = NULL ) {
534                 global $wgUser, $wgStylePath, $wgArticlePath, $wgMathPath;
535                 global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath;
536                 global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath;
537                 global $wgSharedThumbnailScriptPath, $wgEnableParserCache, $wgHooks, $wgServer;
538                 global $wgRightsUrl, $wgRightsText, $wgCopyrightIcon, $wgEnableSidebarCache;
539                 global $wgGenerateThumbnailOnParse;
541                 static $oldLogo = NULL;
543                 if ( !$this->setupDone ) {
544                         $wgHooks['GetLocalURL'][] =& $this;
545                         $wgHooks['GetFullURL'][] =& $this;
546                         $wgHooks['SiteNoticeBefore'][] =& $this;
547                         $wgHooks['SiteNoticeAfter'][] =& $this;
548                         $this->oldArticlePath = $wgServer . $wgArticlePath;
549                 }
551                 if ( is_null( $currentDepth ) ) {
552                         $currentDepth = $this->depth;
553                 }
555                 if ( $this->alternateScriptPath ) {
556                         if ( $currentDepth == 0 ) {
557                                 $wgScriptPath = '.';
558                         } else {
559                                 $wgScriptPath = '..' . str_repeat( '/..', $currentDepth - 1 );
560                         }
561                 } else {
562                         $wgScriptPath = '..' . str_repeat( '/..', $currentDepth );
563                 }
565                 $wgArticlePath = str_repeat( '../', $currentDepth ) . '$1';
567                 # Logo image
568                 # Allow for repeated setup
569                 if ( !is_null( $oldLogo ) ) {
570                         $wgLogo = $oldLogo;
571                 } else {
572                         $oldLogo = $wgLogo;
573                 }
575                 if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) {
576                         # If it's in the upload directory, rewrite it to the new upload directory
577                         $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 );
578                 } elseif ( $wgLogo{0} == '/' ) {
579                         # This is basically heuristic
580                         # Rewrite an absolute logo path to one relative to the the script path
581                         $wgLogo = $wgScriptPath . $wgLogo;
582                 }
584                 # Another ugly hack
585                 if ( !$this->setupDone ) {
586                         $this->oldCopyrightIcon = $wgCopyrightIcon;
587                 }
588                 $wgCopyrightIcon = str_replace( 'src="/images',
589                         'src="' . htmlspecialchars( $wgScriptPath ) . '/images', $this->oldCopyrightIcon );
591                 $wgStylePath = "$wgScriptPath/skins";
592                 $wgUploadPath = "$wgScriptPath/{$this->imageRel}";
593                 $wgSharedUploadPath = "$wgUploadPath/shared";
594                 $wgMaxCredits = -1;
595                 $wgHideInterlanguageLinks = !$this->interwiki;
596                 $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false;
597                 $wgEnableParserCache = false;
598                 $wgMathPath = "$wgScriptPath/math";
599                 $wgEnableSidebarCache = false;
600                 $wgGenerateThumbnailOnParse = true;
602                 if ( !empty( $wgRightsText ) ) {
603                         $wgRightsUrl = "$wgScriptPath/COPYING.html";
604                 }
606                 $wgUser = new User;
607                 $wgUser->setOption( 'skin', $this->skin );
608                 $wgUser->setOption( 'editsection', 0 );
610                 $this->destUploadDirectory = "{$this->dest}/{$this->imageRel}";
611                 if ( realpath( $this->destUploadDirectory ) == realpath( $wgUploadDirectory ) ) {
612                         print "Disabling image snapshot because the destination is the same as the source\n";
613                         $this->makeSnapshot = false;
614                 }
615                 $this->sharedStaticDirectory = "{$this->destUploadDirectory}/shared";
617                 $this->setupDone = true;
618         }
620         /** Reads the content of a title object, executes the skin and captures the result */
621         function getArticleHTML( $title ) {
622                 global $wgOut, $wgTitle, $wgArticle, $wgUser;
624                 $linkCache =& LinkCache::singleton();
625                 $linkCache->clear();
626                 $wgTitle = $title;
627                 if ( is_null( $wgTitle ) ) {
628                         return false;
629                 }
631                 $ns = $wgTitle->getNamespace();
632                 if ( $ns == NS_SPECIAL ) {
633                         $wgOut = new OutputPage;
634                         $wgOut->setParserOptions( new ParserOptions );
635                         SpecialPage::executePath( $wgTitle );
636                 } else {
637                         /** @todo merge with Wiki.php code */
638                         if ( $ns == NS_IMAGE ) {
639                                 $wgArticle = new ImagePage( $wgTitle );
640                         } elseif ( $ns == NS_CATEGORY ) {
641                                 $wgArticle = new CategoryPage( $wgTitle );
642                         } else {
643                                 $wgArticle = new Article( $wgTitle );
644                         }
645                         $rt = Title::newFromRedirect( $wgArticle->fetchContent() );
646                         if ( $rt != NULL ) {
647                                 return $this->getRedirect( $rt );
648                         } else {
649                                 $wgOut = new OutputPage;
650                                 $wgOut->setParserOptions( new ParserOptions );
652                                 $wgArticle->view();
653                         }
654                 }
656         
657                 $sk =& $wgUser->getSkin();
658                 ob_start();
659                 $sk->outputPage( $wgOut );
660                 $text = ob_get_contents();
661                 ob_end_clean();
663                 return $text;
664         }
666         function getRedirect( $rt ) {
667                 $url = $rt->escapeLocalURL();
668                 $text = $rt->getPrefixedText();
669                 return <<<ENDTEXT
670 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
671 <html xmlns="http://www.w3.org/1999/xhtml">
672 <head>
673   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
674   <meta http-equiv="Refresh" content="0;url=$url" />
675 </head>
676 <body>
677   <p>Redirecting to <a href="$url">$text</a></p>
678 </body>
679 </html>
680 ENDTEXT;
681         }
683         /** Returns image paths used in an XHTML document */
684         function findImages( $text ) {
685                 global $wgOutputEncoding, $wgDumpImages;
686                 $parser = xml_parser_create( $wgOutputEncoding );
687                 xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' );
689                 $wgDumpImages = array();
690                 xml_parse( $parser, $text );
691                 xml_parser_free( $parser );
693                 return $wgDumpImages;
694         }
696         /**
697          * Copy a file specified by a URL to a given directory
698          * 
699          * @param string $srcPath The source URL
700          * @param string $srcPathBase The base directory of the source URL
701          * @param string $srcDirBase The base filesystem directory of the source URL
702          * @param string $destDirBase The base filesystem directory of the destination URL
703          */
704         function relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) {
705                 $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 ); // +1 for slash
706                 $sourceLoc = "$srcDirBase/$rel";
707                 $destLoc = "$destDirBase/$rel";
708                 #print "Copying $sourceLoc to $destLoc\n";
709                 if ( !file_exists( $destLoc ) ) {
710                         wfMkdirParents( dirname( $destLoc ), 0755 );
711                         if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
712                                 if ( !symlink( $sourceLoc, $destLoc ) ) {
713                                         print "Warning: unable to create symlink at $destLoc\n";
714                                 }
715                         } else {
716                                 if ( !copy( $sourceLoc, $destLoc ) ) {
717                                         print "Warning: unable to copy $sourceLoc to $destLoc\n";
718                                 }
719                         }
720                 }
721         }
723         /**
724          * Copy an image, and if it is a thumbnail, copy its parent image too
725          */
726         function copyImage( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) {
727                 global $wgUploadPath, $wgUploadDirectory, $wgSharedUploadPath;
728                 $this->relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase );
729                 if ( substr( $srcPath, strlen( $srcPathBase ) + 1, 6 ) == 'thumb/' ) {
730                         # The image was a thumbnail
731                         # Copy the source image as well
732                         $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 );
733                         $parts = explode( '/', $rel );
734                         $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}";
735                         $newSrc = "$srcPathBase/$rel";
736                         $this->relativeCopy( $newSrc, $srcPathBase, $srcDirBase, $destDirBase );
737                 }
738         }
739         
740         /**
741          * Copy images (or create symlinks) from commons to a static directory.
742          * This is necessary even if you intend to distribute all of commons, because
743          * the directory contents is used to work out which image description pages
744          * are needed.
745          *
746          * Also copies math images, and full-sized images if the makeSnapshot option 
747          * is specified.
748          *
749          */
750         function copyImages( $images ) {
751                 global $wgUploadPath, $wgUploadDirectory, $wgSharedUploadPath, $wgSharedUploadDirectory, 
752                         $wgMathPath, $wgMathDirectory;
753                 # Find shared uploads and copy them into the static directory
754                 $sharedPathLength = strlen( $wgSharedUploadPath );
755                 $mathPathLength = strlen( $wgMathPath );
756                 $uploadPathLength = strlen( $wgUploadPath );
757                 foreach ( $images as $escapedImage => $dummy ) {
758                         $image = urldecode( $escapedImage );
760                         if ( substr( $image, 0, $sharedPathLength ) == $wgSharedUploadPath ) {
761                                 $this->copyImage( $image, $wgSharedUploadPath, $wgSharedUploadDirectory, $this->sharedStaticDirectory );
762                         } elseif ( substr( $image, 0, $mathPathLength ) == $wgMathPath ) {
763                                 $this->relativeCopy( $image, $wgMathPath, $wgMathDirectory, "{$this->dest}/math" );
764                         } elseif ( $this->makeSnapshot && substr( $image, 0, $uploadPathLength ) == $wgUploadPath ) {
765                                 $this->copyImage( $image, $wgUploadPath, $wgUploadDirectory, $this->destUploadDirectory );
766                         }
767                 }
768         }
770         function onGetFullURL( &$title, &$url, $query ) {
771                 global $wgContLang, $wgArticlePath;
773                 $iw = $title->getInterwiki();
774                 if ( $title->isExternal() && $wgContLang->getLanguageName( $iw ) ) {
775                         if ( $title->getDBkey() == '' ) {
776                                 $url = str_replace( '$1', "../$iw/index.html", $wgArticlePath );
777                         } else {
778                                 $url = str_replace( '$1', "../$iw/" . wfUrlencode( $this->getHashedFilename( $title ) ),
779                                         $wgArticlePath );
780                         }
781                         $url .= $this->compress ? ".gz" : "";
782                         return false;
783                 } else {
784                         return true;
785                 }
786         }
788         function onGetLocalURL( &$title, &$url, $query ) {
789                 global $wgArticlePath;
791                 if ( $title->isExternal() ) {
792                         # Default is fine for interwiki
793                         return true;
794                 }
796                 $url = false;
797                 if ( $query != '' ) {
798                         $params = array();
799                         parse_str( $query, $params );
800                         if ( isset($params['action']) && $params['action'] == 'raw' ) {
801                                 if ( $params['gen'] == 'css' || $params['gen'] == 'js' ) {
802                                         $file = 'gen.' . $params['gen'];
803                                 } else {
804                                         $file = $this->getFriendlyName( $title->getPrefixedDBkey() );
805                                         // Clean up Monobook.css etc.
806                                         $matches = array();
807                                         if ( preg_match( '/^(.*)\.(css|js)_[0-9a-f]{4}$/', $file, $matches ) ) {
808                                                 $file = $matches[1] . '.' . $matches[2];
809                                         }
810                                 }
811                                 $this->rawPages[$file] = array( $file, $title, $params );
812                                 $url = str_replace( '$1', "raw/" . wfUrlencode( $file ), $wgArticlePath );
813                         }
814                 }
815                 if ( $url === false ) {
816                         $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath );
817                 }
818                 $url .= $this->compress ? ".gz" : "";
819                 return false;
820         }
822         function getHashedFilename( &$title ) {
823                 if ( '' != $title->mInterwiki ) {
824                         $dbkey = $title->getDBkey();
825                 } else {
826                         $dbkey = $title->getPrefixedDBkey();
827                 }
829                 $mainPage = Title::newMainPage();
830                 if ( $mainPage->getPrefixedDBkey() == $dbkey ) {
831                         return 'index.html';
832                 }
834                 return $this->getHashedDirectory( $title ) . '/' .
835                         $this->getFriendlyName( $dbkey ) . '.html';
836         }
838         function getFriendlyName( $name ) {
839                 global $wgLang;
840                 # Replace illegal characters for Windows paths with underscores
841                 $friendlyName = strtr( $name, '/\\*?"<>|~', '_________' );
843                 # Work out lower case form. We assume we're on a system with case-insensitive
844                 # filenames, so unless the case is of a special form, we have to disambiguate
845                 if ( function_exists( 'mb_strtolower' ) ) {
846                         $lowerCase = $wgLang->ucfirst( mb_strtolower( $name ) );
847                 } else {
848                         $lowerCase = ucfirst( strtolower( $name ) );
849                 }
851                 # Make it mostly unique
852                 if ( $lowerCase != $friendlyName  ) {
853                         $friendlyName .= '_' . substr(md5( $name ), 0, 4);
854                 }
855                 # Handle colon specially by replacing it with tilde
856                 # Thus we reduce the number of paths with hashes appended
857                 $friendlyName = str_replace( ':', '~', $friendlyName );
859                 return $friendlyName;
860         }
862         /**
863          * Get a relative directory for putting a title into
864          */
865         function getHashedDirectory( &$title ) {
866                 if ( '' != $title->getInterwiki() ) {
867                         $pdbk = $title->getDBkey();
868                 } else {
869                         $pdbk = $title->getPrefixedDBkey();
870                 }
872                 # Find the first colon if there is one, use characters after it
873                 $p = strpos( $pdbk, ':' );
874                 if ( $p !== false ) {
875                         $dbk = substr( $pdbk, $p + 1 );
876                         $dbk = substr( $dbk, strspn( $dbk, '_' ) );
877                 } else {
878                         $dbk = $pdbk;
879                 }
881                 # Split into characters
882                 $m = array();
883                 preg_match_all( '/./us', $dbk, $m );
885                 $chars = $m[0];
886                 $length = count( $chars );
887                 $dir = '';
889                 for ( $i = 0; $i < $this->depth; $i++ ) {
890                         if ( $i ) {
891                                 $dir .= '/';
892                         }
893                         if ( $i >= $length ) {
894                                 $dir .= '_';
895                         } else {
896                                 $c = $chars[$i];
897                                 if ( ord( $c ) >= 128 || preg_match( '/[a-zA-Z0-9!#$%&()+,[\]^_`{}-]/', $c ) ) {
898                                         if ( function_exists( 'mb_strtolower' ) ) {
899                                                 $dir .= mb_strtolower( $c );
900                                         } else {
901                                                 $dir .= strtolower( $c );
902                                         }
903                                 } else {
904                                         $dir .= sprintf( "%02X", ord( $c ) );
905                                 }
906                         }
907                 }
908                 return $dir;
909         }
911         /**
912          * Calculate the start end end of a job based on the current slice
913          * @param integer $start
914          * @param integer $end
915          * @return array of integers
916          */
917         function sliceRange( $start, $end ) {
918                 $count = $end - $start + 1;
919                 $each = $count / $this->sliceDenominator;
920                 $sliceStart = $start + intval( $each * ( $this->sliceNumerator - 1 ) );
921                 if ( $this->sliceNumerator == $this->sliceDenominator ) {
922                         $sliceEnd = $end;
923                 } else {
924                         $sliceEnd = $start + intval( $each * $this->sliceNumerator ) - 1;
925                 }
926                 return array( $sliceStart, $sliceEnd );
927         }
929         /**
930          * Adjust a start point so that it belongs to the current slice, where slices are defined by integer modulo
931          * @param integer $start
932          * @param integer $base The true start of the range; the minimum start
933          */
934         function modSliceStart( $start, $base = 1 ) {
935                 return $start - ( $start % $this->sliceDenominator ) + $this->sliceNumerator - 1 + $base;
936         }
938         /**
939          * Determine whether a string belongs to the current slice, based on hash
940          */
941         function sliceFilter( $s ) {
942                 return crc32( $s ) % $this->sliceDenominator == $this->sliceNumerator - 1;
943         }
945         /**
946          * No site notice
947          */
948         function onSiteNoticeBefore( &$text ) {
949                 $text = '';
950                 return false;
951         }
952         function onSiteNoticeAfter( &$text ) {
953                 $text = '';
954                 return false;
955         }
957         function getMaxPageID() {
958                 if ( $this->maxPageID === false ) {
959                         $dbr = wfGetDB( DB_SLAVE );
960                         $this->maxPageID = $dbr->selectField( 'page', 'max(page_id)', false, __METHOD__ );
961                 }
962                 return $this->maxPageID;
963         }
965         function profile() {
966                 global $wgProfiler;
968                 if ( !$this->udpProfile ) {
969                         return;
970                 }
971                 if ( !$this->udpProfileInit ) {
972                         $this->udpProfileInit = true;
973                 } elseif ( $this->udpProfileCounter == 1 % $this->udpProfile ) {
974                         $wgProfiler->getFunctionReport();
975                         $wgProfiler = new DumpHTML_ProfilerStub;
976                 }
977                 if ( $this->udpProfileCounter == 0 ) {
978                         $wgProfiler = new ProfilerSimpleUDP;
979                         $wgProfiler->setProfileID( 'dumpHTML' );
980                 }
981                 $this->udpProfileCounter = ( $this->udpProfileCounter + 1 ) % $this->udpProfile;
982         }
985 class DumpHTML_ProfilerStub {
986         function profileIn() {}
987         function profileOut() {}
988         function getOutput() {}
989         function close() {}
990         function getFunctionReport() {}
993 /** XML parser callback */
994 function wfDumpStartTagHandler( $parser, $name, $attribs ) {
995         global $wgDumpImages;
997         if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) {
998                 $wgDumpImages[$attribs['SRC']] = true;
999         }
1002 /** XML parser callback */
1003 function wfDumpEndTagHandler( $parser, $name ) {}
1005 # vim: syn=php