* Adding name for ilo (Ilokano)
[mediawiki.git] / maintenance / dumpHTML.inc
blobd508e387a088eac90b64378f43005919af0190a4
1 <?php
2 /**
3  * @package MediaWiki
4  * @subpackage Maintenance
5  */
7 define( 'REPORTING_INTERVAL', 10 );
9 require_once( 'includes/ImagePage.php' );
10 require_once( 'includes/CategoryPage.php' );
12 class DumpHTML {
13         # Destination directory
14         var $dest;
16         # Show interlanguage links?
17         var $interwiki = true;
18         
19         # Depth of HTML directory tree
20         var $depth = 3;
22         # Directory that commons images are copied into
23         var $sharedStaticPath;
24         
25         # Relative path to image directory
26         var $imageRel = 'upload';
28         # Copy commons images instead of symlinking
29         var $forceCopy = false;
31         # Make links assuming the script path is in the same directory as 
32         # the destination
33         var $alternateScriptPath = false;
35         function DumpHTML( $settings ) {
36                 foreach ( $settings as $var => $value ) {
37                         $this->$var = $value;
38                 }
39         }
41         /** 
42          * Write a set of articles specified by start and end page_id 
43          * Skip categories and images, they will be done separately
44          */
45         function doArticles( $start, $end = false ) {
46                 $fname = 'DumpHTML::doArticles';
47                 
48                 $this->setupGlobals();
49                 
50                 if ( $end === false ) {
51                         $dbr =& wfGetDB( DB_SLAVE );
52                         $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname );
53                 }
55                 
56                 for ($id = $start; $id <= $end; $id++) {
57                         if ( !($id % REPORTING_INTERVAL) ) {
58                                 print "Processing ID: $id\r";
59                         }
60                         $title = Title::newFromID( $id );
61                         if ( $title ) {
62                                 $ns = $title->getNamespace() ;
63                                 if ( $ns != NS_CATEGORY ) { 
64                                         $this->doArticle( $title );
65                                 }
66                         }
67                 }
68                 print "\n";
69         }       
71         function doSpecials() {
72                 $this->doMainPage();
74                 $this->setupGlobals();
75                 print "Special:Categories...";
76                 $this->doArticle( Title::makeTitle( NS_SPECIAL, 'Categories' ) );
77                 print "\n";
78         }
80         /** Write the main page as index.html */
81         function doMainPage() {
82                 global $wgMakeDumpLinks;
84                 print "Making index.html  ";
86                 // Set up globals with no ../../.. in the link URLs
87                 $this->setupGlobals( 0 );
89                 // But still use that directory style
90                 $wgMakeDumpLinks = 3;
91                 
92                 $title = Title::newMainPage();
93                 $text = $this->getArticleHTML( $title );
94                 $file = fopen( "{$this->dest}/index.html", "w" );
95                 if ( !$file ) {
96                         print "\nCan't open index.html for writing\n";
97                         return false;
98                 }
99                 fwrite( $file, $text );
100                 fclose( $file );
101                 print "\n";
102         }
104         function doImageDescriptions() {
105                 global $wgSharedUploadDirectory;
106                 
107                 $fname = 'DumpHTML::doImageDescriptions';
108                 
109                 $this->setupGlobals( 3 );
111                 /** 
112                  * Dump image description pages that don't have an associated article, but do 
113                  * have a local image
114                  */
115                 $dbr =& wfGetDB( DB_SLAVE );
116                 extract( $dbr->tableNames( 'image', 'page' ) );
117                 $res = $dbr->select( 'image', array( 'img_name' ), false, $fname );
119                 $i = 0;
120                 print "Writing image description pages for local images\n";
121                 $num = $dbr->numRows( $res );
122                 while ( $row = $dbr->fetchObject( $res ) ) {
123                         if ( !( ++$i % REPORTING_INTERVAL ) ) {
124                                 print "Done $i of $num\r";
125                         }
126                         $title = Title::makeTitle( NS_IMAGE, $row->img_name );
127                         if ( $title->getArticleID() ) { 
128                                 // Already done by dumpHTML
129                                 continue;
130                         }
131                         $this->doArticle( $title );
132                 }
133                 print "\n";
135                 /**
136                  * Dump images which only have a real description page on commons
137                  */
138                 print "Writing description pages for commons images\n";
139                 $i = 0;
140                 for ( $hash = 0; $hash < 256; $hash++ ) {                               
141                         $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash );
142                         $paths = glob( "{$this->sharedStaticPath}/$dir/*" );
143                         $paths += glob( "{$this->sharedStaticPath}/thumb/$dir/*" );
145                         foreach ( $paths as $path ) {
146                                 $file = basename( $path );
147                                 if ( !(++$i % REPORTING_INTERVAL ) ) {
148                                         print "$i\r";
149                                 }
151                                 $title = Title::makeTitle( NS_IMAGE, $file );
152                                 $this->doArticle( $title );
153                         }
154                 }
155                 print "\n";
156         }
158         function doCategories() {
159                 $fname = 'DumpHTML::doCategories';
160                 $this->setupGlobals();
162                 $dbr =& wfGetDB( DB_SLAVE );
163                 $categorylinks = $dbr->tableName( 'categorylinks' );
164                 print "Selecting categories...";
165                 $sql = 'SELECT DISTINCT cl_to FROM categorylinks';
166                 $res = $dbr->query( $sql, $fname );
168                 print "\nWriting " . $dbr->numRows( $res ).  " category pages\n";
169                 $i = 0;
170                 while ( $row = $dbr->fetchObject( $res ) ) {
171                         if ( !(++$i % REPORTING_INTERVAL ) ) {
172                                 print "$i\r";
173                         }
174                         $title = Title::makeTitle( NS_CATEGORY, $row->cl_to );
175                         $this->doArticle( $title );
176                 }
177                 print "\n";
178         }
181         /** Write an article specified by title */
182         function doArticle( $title ) {
183                 global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory;
184                 global $wgUploadDirectory;
185                 
186                 $text = $this->getArticleHTML( $title );
187                 if ( $text === false ) {
188                         return;
189                 }
191                 # Parse the XHTML to find the images
192                 $images = $this->findImages( $text );
193                 $this->copyImages( $images );
195                 # Write to file
196                 $this->writeArticle( $title, $text );
197         }
199         /** Write the given text to the file identified by the given title object */
200         function writeArticle( &$title, $text ) {
201                 $filename = $title->getHashedFilename();
202                 $fullName = "{$this->dest}/$filename";
203                 $fullDir = dirname( $fullName );
205                 wfMkdirParents( $fullDir, 0755 );
207                 $file = fopen( $fullName, 'w' );
208                 if ( !$file ) {
209                         print("Can't open file $fullName for writing\n");
210                         return;
211                 }
212                 
213                 fwrite( $file, $text );
214                 fclose( $file );
215         }
217         /** Set up globals required for parsing */
218         function setupGlobals( $depth = NULL ) {
219                 global $wgUser, $wgTitle, $wgMakeDumpLinks, $wgStylePath, $wgArticlePath;
220                 global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath;
221                 global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath;
222                 global $wgSharedThumbnailScriptPath, $wgEnableParserCache;
224                 static $oldLogo = NULL;
225                 
226                 if ( is_null( $depth ) ) {
227                         $wgMakeDumpLinks = $this->depth;
228                 } else {
229                         $wgMakeDumpLinks = $depth;
230                 }
231                 
232                 if ( $this->alternateScriptPath ) {
233                         if ( $wgMakeDumpLinks == 0 ) {
234                                 $wgScriptPath = '.';
235                         } else {
236                                 $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks - 1 );
237                         }
238                 } else {
239                         $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks );
240                 }
242                 $wgArticlePath = str_repeat( '../', $wgMakeDumpLinks ) . '$1';
244                 # Logo image
245                 # Allow for repeated setup
246                 if ( !is_null( $oldLogo ) ) {
247                         $wgLogo = $oldLogo;
248                 } else {
249                         $oldLogo = $wgLogo;
250                 }
252                 if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) {
253                         # If it's in the upload directory, rewrite it to the new upload directory
254                         $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 );
255                 } elseif ( $wgLogo{0} == '/' ) {
256                         # This is basically heuristic
257                         # Rewrite an absolute logo path to one relative to the the script path
258                         $wgLogo = $wgScriptPath . $wgLogo;
259                 }
261                 $wgStylePath = "$wgScriptPath/skins";
262                 $wgUploadPath = "$wgScriptPath/{$this->imageRel}";
263                 $wgSharedUploadPath = "$wgUploadPath/shared";
264                 $wgMaxCredits = -1;
265                 $wgHideInterlangageLinks = !$this->interwiki;
266                 $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false;
267                 $wgEnableParserCache = false;
269                 $wgUser = new User;
270                 $wgUser->setOption( 'skin', 'htmldump' );
271                 $wgUser->setOption( 'editsection', 0 );
273                 $this->sharedStaticPath = "$wgUploadDirectory/shared";
275         }
277         /** Reads the content of a title object, executes the skin and captures the result */
278         function getArticleHTML( &$title ) {
279                 global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic;
280                 
281                 $wgOut = new OutputPage;
282                 $wgOut->setParserOptions( new ParserOptions );
283                 
284                 $wgTitle = $title;
285                 if ( is_null( $wgTitle ) ) {
286                         return false;
287                 }
288                 
289                 $ns = $wgTitle->getNamespace();
290                 if ( $ns == NS_SPECIAL ) {
291                         SpecialPage::executePath( $wgTitle );
292                 } else {
293                         if ( $ns == NS_IMAGE ) {
294                                 $wgArticle = new ImagePage( $wgTitle );
295                         } elseif ( $wgUseCategoryMagic && $ns == NS_CATEGORY ) {
296                                 $wgArticle = new CategoryPage( $wgTitle );
297                         } else {
298                                 $wgArticle = new Article( $wgTitle );
299                         }
300                         $wgArticle->view();
301                 }
303                 $sk =& $wgUser->getSkin();
304                 ob_start();
305                 $sk->outputPage( $wgOut );
306                 $text = ob_get_contents();
307                 ob_end_clean();
309                 return $text;
310         }
312         /** Returns image paths used in an XHTML document */
313         function findImages( $text ) {
314                 global $wgOutputEncoding, $wgDumpImages;
315                 $parser = xml_parser_create( $wgOutputEncoding );
316                 xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' );
317                 
318                 $wgDumpImages = array();
319                 xml_parse( $parser, $text );
320                 xml_parser_free( $parser );
322                 return $wgDumpImages;
323         }
325         /**
326          * Copy images (or create symlinks) from commons to a static directory.
327          * This is necessary even if you intend to distribute all of commons, because
328          * the directory contents is used to work out which image description pages
329          * are needed.
330          */
331         function copyImages( $images ) {
332                 global $wgSharedUploadPath, $wgSharedUploadDirectory;
333                 # Find shared uploads and copy them into the static directory
334                 $sharedPathLength = strlen( $wgSharedUploadPath ); 
335                 foreach ( $images as $image => $dummy ) {
336                         # Is it shared?
337                         if ( substr( $image, 0, $sharedPathLength ) == $wgSharedUploadPath ) {
338                                 # Reconstruct full filename
339                                 $rel = substr( $image, $sharedPathLength + 1 ); // +1 for slash
340                                 $sourceLoc = "$wgSharedUploadDirectory/$rel";
341                                 $staticLoc = "{$this->sharedStaticPath}/$rel";
342                                 #print "Copying $sourceLoc to $staticLoc\n";
343                                 # Copy to static directory
344                                 if ( !file_exists( $staticLoc ) ) {
345                                         wfMkdirParents( dirname( $staticLoc ), 0755 );
346                                         if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
347                                                 symlink( $sourceLoc, $staticLoc );
348                                         } else {
349                                                 copy( $sourceLoc, $staticLoc );
350                                         }
351                                 }
353                                 if ( substr( $rel, 0, 6 ) == 'thumb/' ) {
354                                         # That was a thumbnail
355                                         # We will also copy the real image
356                                         $parts = explode( '/', $rel );
357                                         $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}";
358                                         $sourceLoc = "$wgSharedUploadDirectory/$rel";
359                                         $staticLoc = "{$this->sharedStaticPath}/$rel";
360                                         #print "Copying $sourceLoc to $staticLoc\n";
361                                         if ( !file_exists( $staticLoc ) ) {
362                                                 wfMkdirParents( dirname( $staticLoc ), 0755 );
363                                                 if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
364                                                         symlink( $sourceLoc, $staticLoc );
365                                                 } else {
366                                                         copy( $sourceLoc, $staticLoc );
367                                                 }
368                                         }
369                                 }
370                         }
371                 }
372         }
375 /** XML parser callback */
376 function wfDumpStartTagHandler( $parser, $name, $attribs ) {
377         global $wgDumpImages;
379         if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) {
380                 $wgDumpImages[$attribs['SRC']] = true;
381         }
384 /** XML parser callback */
385 function wfDumpEndTagHandler( $parser, $name ) {}
387 # vim: syn=php