* Add a h2 header to the begin of the results
[mediawiki.git] / maintenance / convertLinks.inc
blobe4deb22be7d9777755f782a4bfe9148c58dfc095
1 <?php
2 /**
3  * @file
4  * @todo document
5  * @ingroup Maintenance
6  */
8 /** */
9 function convertLinks() {
10         global $wgDBtype;
11         if( $wgDBtype == 'postgres' ) {
12                 print "Links table already ok on Postgres.\n";
13                 return;
14         }
16         print "Converting links table to ID-ID...\n";
18         global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
19         global $noKeys, $logPerformance, $fh;
21         $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc
22         $totalTuplesInserted = 0; # total tuples INSERTed into links_temp
24         $reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table
25         $curReadReportInterval = 1000; #number of rows between progress reports
27         $reportLinksConvProgress = true; #whether or not to give progress reports during conversion
28         $linksConvInsertInterval = 1000; #number of rows per INSERT
30         $initialRowOffset = 0;
31         #$finalRowOffset = 0; # not used yet; highest row number from links table to process
33         # Overwrite the old links table with the new one.  If this is set to false,
34         # the new table will be left at links_temp.
35         $overwriteLinksTable = true;
37         # Don't create keys, and so allow duplicates in the new links table.
38         # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?)
39         $noKeys = false;
42         $logPerformance = false; # output performance data to a file
43         $perfLogFilename = "convLinksPerf.txt";
44         #--------------------------------------------------------------------
46         $dbw = wfGetDB( DB_MASTER );
47         list ($cur, $links, $links_temp, $links_backup) = $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
49         $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
50         if ( $dbw->fieldType( $res, 0 ) == "int" ) {
51                 print "Schema already converted\n";
52                 return;
53         }
55         $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
56         $row = $dbw->fetchObject($res);
57         $numRows = $row->count;
58         $dbw->freeResult( $res );
60         if ( $numRows == 0 ) {
61                 print "Updating schema (no rows to convert)...\n";
62                 createTempTable();
63         } else {
64                 if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); }
65                 $baseTime = $startTime = getMicroTime();
66                 # Create a title -> cur_id map
67                 print "Loading IDs from $cur table...\n";
68                 performanceLog ( "Reading $numRows rows from cur table...\n" );
69                 performanceLog ( "rows read vs seconds elapsed:\n" );
71                 $dbw->bufferResults( false );
72                 $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
73                 $ids = array();
75                 while ( $row = $dbw->fetchObject( $res ) ) {
76                         $title = $row->cur_title;
77                         if ( $row->cur_namespace ) {
78                                 $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title";
79                         }
80                         $ids[$title] = $row->cur_id;
81                         $curRowsRead++;
82                         if ($reportCurReadProgress) {
83                                 if (($curRowsRead % $curReadReportInterval) == 0) {
84                                         performanceLog( $curRowsRead . " " . (getMicroTime() - $baseTime) . "\n" );
85                                         print "\t$curRowsRead rows of $cur table read.\n";
86                                 }
87                         }
88                 }
89                 $dbw->freeResult( $res );
90                 $dbw->bufferResults( true );
91                 print "Finished loading IDs.\n\n";
92                 performanceLog( "Took " . (getMicroTime() - $baseTime) . " seconds to load IDs.\n\n" );
93         #--------------------------------------------------------------------
95                 # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
96                 # convert, and write to the new table.
97                 createTempTable();
98                 performanceLog( "Resetting timer.\n\n" );
99                 $baseTime = getMicroTime();
100                 print "Processing $numRows rows from $links table...\n";
101                 performanceLog( "Processing $numRows rows from $links table...\n" );
102                 performanceLog( "rows inserted vs seconds elapsed:\n" );
104                 for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) {
105                         $sqlRead = "SELECT * FROM $links ";
106                         $sqlRead = $dbw->limitResult($sqlRead, $linksConvInsertInterval,$rowOffset);
107                         $res = $dbw->query($sqlRead);
108                         if ( $noKeys ) {
109                                 $sqlWrite = array("INSERT INTO $links_temp (l_from,l_to) VALUES ");
110                         } else {
111                                 $sqlWrite = array("INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES ");
112                         }
114                         $tuplesAdded = 0; # no tuples added to INSERT yet
115                         while ( $row = $dbw->fetchObject($res) ) {
116                                 $fromTitle = $row->l_from;
117                                 if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
118                                         $from = $ids[$fromTitle];
119                                         $to = $row->l_to;
120                                         if ( $tuplesAdded != 0 ) {
121                                                 $sqlWrite[] = ",";
122                                         }
123                                         $sqlWrite[] = "($from,$to)";
124                                         $tuplesAdded++;
125                                 } else { # invalid title
126                                         $numBadLinks++;
127                                 }
128                         }
129                         $dbw->freeResult($res);
130                         #print "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n";
131                         if ( $tuplesAdded != 0  ) {
132                                 if ($reportLinksConvProgress) {
133                                         print "Inserting $tuplesAdded tuples into $links_temp...";
134                                 }
135                                 $dbw->query( implode("",$sqlWrite) );
136                                 $totalTuplesInserted += $tuplesAdded;
137                                 if ($reportLinksConvProgress)
138                                         print " done. Total $totalTuplesInserted tuples inserted.\n";
139                                         performanceLog( $totalTuplesInserted . " " . (getMicroTime() - $baseTime) . "\n"  );
140                         }
141                 }
142                 print "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n";
143                 performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" );
144                 performanceLog( "Total execution time: " . (getMicroTime() - $startTime) . " seconds.\n" );
145                 if ( $logPerformance ) { fclose ( $fh ); }
146         }
147         #--------------------------------------------------------------------
149         if ( $overwriteLinksTable ) {
150                 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
151                 if (!($dbConn->isOpen())) {
152                         print "Opening connection to database failed.\n";
153                         return;
154                 }
155                 # Check for existing links_backup, and delete it if it exists.
156                 print "Dropping backup links table if it exists...";
157                 $dbConn->query( "DROP TABLE IF EXISTS $links_backup", DB_MASTER);
158                 print " done.\n";
160                 # Swap in the new table, and move old links table to links_backup
161                 print "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'...";
162                 $dbConn->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", DB_MASTER );
163                 print " done.\n\n";
165                 $dbConn->close();
166                 print "Conversion complete. The old table remains at $links_backup;\n";
167                 print "delete at your leisure.\n";
168         } else {
169                 print "Conversion complete.  The converted table is at $links_temp;\n";
170                 print "the original links table is unchanged.\n";
171         }
174 #--------------------------------------------------------------------
176 function createTempTable() {
177         global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
178         global $noKeys;
179         $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
181         if (!($dbConn->isOpen())) {
182                 print "Opening connection to database failed.\n";
183                 return;
184         }
185         $links_temp = $dbConn->tableName( 'links_temp' );
187         print "Dropping temporary links table if it exists...";
188         $dbConn->query( "DROP TABLE IF EXISTS $links_temp");
189         print " done.\n";
191         print "Creating temporary links table...";
192         if ( $noKeys ) {
193                 $dbConn->query( "CREATE TABLE $links_temp ( " .
194                 "l_from int(8) unsigned NOT NULL default '0', " .
195                 "l_to int(8) unsigned NOT NULL default '0')");
196         } else {
197                 $dbConn->query( "CREATE TABLE $links_temp ( " .
198                 "l_from int(8) unsigned NOT NULL default '0', " .
199                 "l_to int(8) unsigned NOT NULL default '0', " .
200                 "UNIQUE KEY l_from(l_from,l_to), " .
201                 "KEY (l_to))");
202         }
203         print " done.\n\n";
206 function performanceLog( $text ) {
207         global $logPerformance, $fh;
208         if ( $logPerformance ) {
209                 fwrite( $fh, $text );
210         }
213 function getMicroTime() { # return time in seconds, with microsecond accuracy
214         list($usec, $sec) = explode(" ", microtime());
215         return ((float)$usec + (float)$sec);