In QueryPage: fixing a misleading comment. value needn't be numeric.
[mediawiki.git] / maintenance / convertLinks.inc
blob62ec7a2412550f2cf9c997b1fc8dae83278ff99a
1 <?php
3 function convertLinks() {
4         print "Converting links table to ID-ID...\n";
5         
6         global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
7         global $noKeys, $logPerformance, $fh;
8         
9         $numRows = $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc
10         $totalTuplesInserted = 0; # total tuples INSERTed into links_temp
12         $reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table
13         $curReadReportInterval = 1000; #number of rows between progress reports
15         $reportLinksConvProgress = true; #whether or not to give progress reports during conversion
16         $linksConvInsertInterval = 1000; #number of rows per INSERT
18         $initialRowOffset = 0;
19         #$finalRowOffset = 0; # not used yet; highest row number from links table to process
21         # Overwrite the old links table with the new one.  If this is set to false,
22         # the new table will be left at links_temp.
23         $overwriteLinksTable = true;
25         # Don't create keys, and so allow duplicates in the new links table.
26         # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?)
27         $noKeys = false;
30         $logPerformance = false; # output performance data to a file
31         $perfLogFilename = "convLinksPerf.txt";
32         #--------------------------------------------------------------------
33         
34         $dbw =& wfGetDB( DB_MASTER );
35         extract( $dbw->tableNames( 'cur', 'links', 'links_temp', 'links_backup' ) );
37         $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
38         if ( $dbw->fieldType( $res, 0 ) == "int" ) {
39                 print "Schema already converted\n";
40                 return;
41         }
42         
43         $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
44         $row = $dbw->fetchObject($res);
45         $numRows = $row->count;
46         $dbw->freeResult( $res );
48         if ( $numRows == 0 ) {
49                 print "Updating schema (no rows to convert)...\n";
50                 createTempTable();
51         } else {
52                 if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); }
53                 $baseTime = $startTime = getMicroTime();
54                 # Create a title -> cur_id map
55                 print "Loading IDs from $cur table...\n";
56                 performanceLog ( "Reading $numRows rows from cur table...\n" );
57                 performanceLog ( "rows read vs seconds elapsed:\n" );
58                 
59                 $dbw->setBufferResults( false );
60                 $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
61                 $ids = array();
63                 while ( $row = $dbw->fetchObject( $res ) ) {
64                         $title = $row->cur_title;
65                         if ( $row->cur_namespace ) {
66                                 $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title";
67                         }
68                         $ids[$title] = $row->cur_id;
69                         $curRowsRead++;
70                         if ($reportCurReadProgress) {
71                                 if (($curRowsRead % $curReadReportInterval) == 0) {
72                                         performanceLog( $curRowsRead . " " . (getMicroTime() - $baseTime) . "\n" );
73                                         print "\t$curRowsRead rows of $cur table read.\n";      
74                                 }
75                         }
76                 }
77                 $dbw->freeResult( $res );
78                 $dbw->setBufferResults( true );
79                 print "Finished loading IDs.\n\n";
80                 performanceLog( "Took " . (getMicroTime() - $baseTime) . " seconds to load IDs.\n\n" );
81         #--------------------------------------------------------------------
83                 # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
84                 # convert, and write to the new table.
85                 createTempTable();
86                 performanceLog( "Resetting timer.\n\n" );
87                 $baseTime = getMicroTime();
88                 print "Processing $numRows rows from $links table...\n";
89                 performanceLog( "Processing $numRows rows from $links table...\n" );
90                 performanceLog( "rows inserted vs seconds elapsed:\n" );
91                 
92                 for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) {
93                         $sqlRead = "SELECT * FROM $links ".wfLimitResult($linksConvInsertInterval,$rowOffset);
94                         $res = $dbw->query($sqlRead);
95                         if ( $noKeys ) {
96                                 $sqlWrite = array("INSERT INTO $links_temp (l_from,l_to) VALUES ");
97                         } else {
98                                 $sqlWrite = array("INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES ");
99                         }
100                         
101                         $tuplesAdded = 0; # no tuples added to INSERT yet
102                         while ( $row = $dbw->fetchObject($res) ) {
103                                 $fromTitle = $row->l_from; 
104                                 if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
105                                         $from = $ids[$fromTitle];
106                                         $to = $row->l_to;
107                                         if ( $tuplesAdded != 0 ) {
108                                                 $sqlWrite[] = ",";
109                                         }
110                                         $sqlWrite[] = "($from,$to)";
111                                         $tuplesAdded++;                         
112                                 } else { # invalid title
113                                         $numBadLinks++;
114                                 }
115                         }
116                         $dbw->freeResult($res);
117                         #print "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n";
118                         if ( $tuplesAdded != 0  ) {
119                                 if ($reportLinksConvProgress) {
120                                         print "Inserting $tuplesAdded tuples into $links_temp...";
121                                 }
122                                 $dbw->query( implode("",$sqlWrite) );
123                                 $totalTuplesInserted += $tuplesAdded;
124                                 if ($reportLinksConvProgress)
125                                         print " done. Total $totalTuplesInserted tuples inserted.\n";
126                                         performanceLog( $totalTuplesInserted . " " . (getMicroTime() - $baseTime) . "\n"  );
127                         }
128                 }
129                 print "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n";
130                 performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" );
131                 performanceLog( "Total execution time: " . (getMicroTime() - $startTime) . " seconds.\n" );
132                 if ( $logPerformance ) { fclose ( $fh ); }
133         }
134         #--------------------------------------------------------------------
136         if ( $overwriteLinksTable ) {
137                 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
138                 if (!($dbConn->isOpen())) {
139                         print "Opening connection to database failed.\n";
140                         return;
141                 }
142                 # Check for existing links_backup, and delete it if it exists.
143                 print "Dropping backup links table if it exists...";
144                 $dbConn->query( "DROP TABLE IF EXISTS $links_backup", DB_MASTER);
145                 print " done.\n";
146                 
147                 # Swap in the new table, and move old links table to links_backup
148                 print "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'...";
149                 $dbConn->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", DB_MASTER );
150                 print " done.\n\n";
151                 
152                 $dbConn->close();
153                 print "Conversion complete. The old table remains at $links_backup;\n";
154                 print "delete at your leisure.\n";
155         } else {
156                 print "Conversion complete.  The converted table is at $links_temp;\n";
157                 print "the original links table is unchanged.\n";
158         }
161 #--------------------------------------------------------------------
163 function createTempTable() {
164         global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
165         global $noKeys;
166         $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
167         
168         if (!($dbConn->isOpen())) {
169                 print "Opening connection to database failed.\n";
170                 return;
171         }
172         $links_temp = $dbConn->tableName( 'links_temp' );
173         
174         print "Dropping temporary links table if it exists...";
175         $dbConn->query( "DROP TABLE IF EXISTS $links_temp");
176         print " done.\n";
177         
178         print "Creating temporary links table...";
179         if ( $noKeys ) {
180                 $dbConn->query( "CREATE TABLE $links_temp ( " .
181                 "l_from int(8) unsigned NOT NULL default '0', " .
182                 "l_to int(8) unsigned NOT NULL default '0')");
183         } else {
184                 $dbConn->query( "CREATE TABLE $links_temp ( " .
185                 "l_from int(8) unsigned NOT NULL default '0', " .
186                 "l_to int(8) unsigned NOT NULL default '0', " .
187                 "UNIQUE KEY l_from(l_from,l_to), " .
188                 "KEY (l_to))");
189         }
190         print " done.\n\n";
193 function performanceLog( $text ) {
194         global $logPerformance, $fh;
195         if ( $logPerformance ) {
196                 fwrite( $fh, $text );
197         }
200 function getMicroTime() { # return time in seconds, with microsecond accuracy
201         list($usec, $sec) = explode(" ", microtime());
202         return ((float)$usec + (float)$sec);