4 * @subpackage Maintenance
7 define( 'REPORTING_INTERVAL', 10 );
9 require_once( 'includes/ImagePage.php' );
10 require_once( 'includes/CategoryPage.php' );
13 # Destination directory
16 # Show interlanguage links?
17 var $interwiki = true;
19 # Depth of HTML directory tree
22 # Directory that commons images are copied into
23 var $sharedStaticPath;
25 # Relative path to image directory
26 var $imageRel = 'upload';
28 # Copy commons images instead of symlinking
29 var $forceCopy = false;
31 # Make links assuming the script path is in the same directory as
33 var $alternateScriptPath = false;
35 function DumpHTML( $settings ) {
36 foreach ( $settings as $var => $value ) {
42 * Write a set of articles specified by start and end page_id
43 * Skip categories and images, they will be done separately
45 function doArticles( $start, $end = false ) {
46 $fname = 'DumpHTML::doArticles';
48 $this->setupGlobals();
50 if ( $end === false ) {
51 $dbr =& wfGetDB( DB_SLAVE );
52 $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname );
56 for ($id = $start; $id <= $end; $id++) {
57 if ( !($id % REPORTING_INTERVAL) ) {
58 print "Processing ID: $id\r";
60 $title = Title::newFromID( $id );
62 $ns = $title->getNamespace() ;
63 if ( $ns != NS_CATEGORY ) {
64 $this->doArticle( $title );
71 function doSpecials() {
74 $this->setupGlobals();
75 print "Special:Categories...";
76 $this->doArticle( Title::makeTitle( NS_SPECIAL, 'Categories' ) );
80 /** Write the main page as index.html */
81 function doMainPage() {
82 global $wgMakeDumpLinks;
84 print "Making index.html ";
86 // Set up globals with no ../../.. in the link URLs
87 $this->setupGlobals( 0 );
89 // But still use that directory style
92 $title = Title::newMainPage();
93 $text = $this->getArticleHTML( $title );
94 $file = fopen( "{$this->dest}/index.html", "w" );
96 print "\nCan't open index.html for writing\n";
99 fwrite( $file, $text );
104 function doImageDescriptions() {
105 global $wgSharedUploadDirectory;
107 $fname = 'DumpHTML::doImageDescriptions';
109 $this->setupGlobals( 3 );
112 * Dump image description pages that don't have an associated article, but do
115 $dbr =& wfGetDB( DB_SLAVE );
116 extract( $dbr->tableNames( 'image', 'page' ) );
117 $res = $dbr->select( 'image', array( 'img_name' ), false, $fname );
120 print "Writing image description pages for local images\n";
121 $num = $dbr->numRows( $res );
122 while ( $row = $dbr->fetchObject( $res ) ) {
123 if ( !( ++$i % REPORTING_INTERVAL ) ) {
124 print "Done $i of $num\r";
126 $title = Title::makeTitle( NS_IMAGE, $row->img_name );
127 if ( $title->getArticleID() ) {
128 // Already done by dumpHTML
131 $this->doArticle( $title );
136 * Dump images which only have a real description page on commons
138 print "Writing description pages for commons images\n";
140 for ( $hash = 0; $hash < 256; $hash++ ) {
141 $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash );
142 $paths = glob( "{$this->sharedStaticPath}/$dir/*" );
143 $paths += glob( "{$this->sharedStaticPath}/thumb/$dir/*" );
145 foreach ( $paths as $path ) {
146 $file = basename( $path );
147 if ( !(++$i % REPORTING_INTERVAL ) ) {
151 $title = Title::makeTitle( NS_IMAGE, $file );
152 $this->doArticle( $title );
158 function doCategories() {
159 $fname = 'DumpHTML::doCategories';
160 $this->setupGlobals();
162 $dbr =& wfGetDB( DB_SLAVE );
163 $categorylinks = $dbr->tableName( 'categorylinks' );
164 print "Selecting categories...";
165 $sql = 'SELECT DISTINCT cl_to FROM categorylinks';
166 $res = $dbr->query( $sql, $fname );
168 print "\nWriting " . $dbr->numRows( $res ). " category pages\n";
170 while ( $row = $dbr->fetchObject( $res ) ) {
171 if ( !(++$i % REPORTING_INTERVAL ) ) {
174 $title = Title::makeTitle( NS_CATEGORY, $row->cl_to );
175 $this->doArticle( $title );
181 /** Write an article specified by title */
182 function doArticle( $title ) {
183 global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory;
184 global $wgUploadDirectory;
186 $text = $this->getArticleHTML( $title );
187 if ( $text === false ) {
191 # Parse the XHTML to find the images
192 $images = $this->findImages( $text );
193 $this->copyImages( $images );
196 $this->writeArticle( $title, $text );
199 /** Write the given text to the file identified by the given title object */
200 function writeArticle( &$title, $text ) {
201 $filename = $title->getHashedFilename();
202 $fullName = "{$this->dest}/$filename";
203 $fullDir = dirname( $fullName );
205 wfMkdirParents( $fullDir, 0755 );
207 $file = fopen( $fullName, 'w' );
209 print("Can't open file $fullName for writing\n");
213 fwrite( $file, $text );
217 /** Set up globals required for parsing */
218 function setupGlobals( $depth = NULL ) {
219 global $wgUser, $wgTitle, $wgMakeDumpLinks, $wgStylePath, $wgArticlePath;
220 global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath;
221 global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath;
222 global $wgSharedThumbnailScriptPath, $wgEnableParserCache;
224 static $oldLogo = NULL;
226 if ( is_null( $depth ) ) {
227 $wgMakeDumpLinks = $this->depth;
229 $wgMakeDumpLinks = $depth;
232 if ( $this->alternateScriptPath ) {
233 if ( $wgMakeDumpLinks == 0 ) {
236 $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks - 1 );
239 $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks );
242 $wgArticlePath = str_repeat( '../', $wgMakeDumpLinks ) . '$1';
245 # Allow for repeated setup
246 if ( !is_null( $oldLogo ) ) {
252 if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) {
253 # If it's in the upload directory, rewrite it to the new upload directory
254 $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 );
255 } elseif ( $wgLogo{0} == '/' ) {
256 # This is basically heuristic
257 # Rewrite an absolute logo path to one relative to the the script path
258 $wgLogo = $wgScriptPath . $wgLogo;
261 $wgStylePath = "$wgScriptPath/skins";
262 $wgUploadPath = "$wgScriptPath/{$this->imageRel}";
263 $wgSharedUploadPath = "$wgUploadPath/shared";
265 $wgHideInterlangageLinks = !$this->interwiki;
266 $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false;
267 $wgEnableParserCache = false;
270 $wgUser->setOption( 'skin', 'htmldump' );
271 $wgUser->setOption( 'editsection', 0 );
273 $this->sharedStaticPath = "$wgUploadDirectory/shared";
277 /** Reads the content of a title object, executes the skin and captures the result */
278 function getArticleHTML( &$title ) {
279 global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic;
281 $wgOut = new OutputPage;
282 $wgOut->setParserOptions( new ParserOptions );
285 if ( is_null( $wgTitle ) ) {
289 $ns = $wgTitle->getNamespace();
290 if ( $ns == NS_SPECIAL ) {
291 SpecialPage::executePath( $wgTitle );
293 if ( $ns == NS_IMAGE ) {
294 $wgArticle = new ImagePage( $wgTitle );
295 } elseif ( $wgUseCategoryMagic && $ns == NS_CATEGORY ) {
296 $wgArticle = new CategoryPage( $wgTitle );
298 $wgArticle = new Article( $wgTitle );
303 $sk =& $wgUser->getSkin();
305 $sk->outputPage( $wgOut );
306 $text = ob_get_contents();
312 /** Returns image paths used in an XHTML document */
313 function findImages( $text ) {
314 global $wgOutputEncoding, $wgDumpImages;
315 $parser = xml_parser_create( $wgOutputEncoding );
316 xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' );
318 $wgDumpImages = array();
319 xml_parse( $parser, $text );
320 xml_parser_free( $parser );
322 return $wgDumpImages;
326 * Copy images (or create symlinks) from commons to a static directory.
327 * This is necessary even if you intend to distribute all of commons, because
328 * the directory contents is used to work out which image description pages
331 function copyImages( $images ) {
332 global $wgSharedUploadPath, $wgSharedUploadDirectory;
333 # Find shared uploads and copy them into the static directory
334 $sharedPathLength = strlen( $wgSharedUploadPath );
335 foreach ( $images as $image => $dummy ) {
337 if ( substr( $image, 0, $sharedPathLength ) == $wgSharedUploadPath ) {
338 # Reconstruct full filename
339 $rel = substr( $image, $sharedPathLength + 1 ); // +1 for slash
340 $sourceLoc = "$wgSharedUploadDirectory/$rel";
341 $staticLoc = "{$this->sharedStaticPath}/$rel";
342 #print "Copying $sourceLoc to $staticLoc\n";
343 # Copy to static directory
344 if ( !file_exists( $staticLoc ) ) {
345 wfMkdirParents( dirname( $staticLoc ), 0755 );
346 if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
347 symlink( $sourceLoc, $staticLoc );
349 copy( $sourceLoc, $staticLoc );
353 if ( substr( $rel, 0, 6 ) == 'thumb/' ) {
354 # That was a thumbnail
355 # We will also copy the real image
356 $parts = explode( '/', $rel );
357 $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}";
358 $sourceLoc = "$wgSharedUploadDirectory/$rel";
359 $staticLoc = "{$this->sharedStaticPath}/$rel";
360 #print "Copying $sourceLoc to $staticLoc\n";
361 if ( !file_exists( $staticLoc ) ) {
362 wfMkdirParents( dirname( $staticLoc ), 0755 );
363 if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
364 symlink( $sourceLoc, $staticLoc );
366 copy( $sourceLoc, $staticLoc );
375 /** XML parser callback */
376 function wfDumpStartTagHandler( $parser, $name, $attribs ) {
377 global $wgDumpImages;
379 if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) {
380 $wgDumpImages[$attribs['SRC']] = true;
384 /** XML parser callback */
385 function wfDumpEndTagHandler( $parser, $name ) {}