bug fix in newcategorymagic: first char was alway the one of the category namespace...
[mediawiki.git] / maintenance / convertLinks.inc
blobf56a771f96499ebba3acec81ec399c2d67ceec43
1 <?php
3 function convertLinks() {
4         print "Converting links table to ID-ID...\n";
5         
6         global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
7         global $noKeys, $logPerformance, $fh;
8         
9         $numRows = $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc
10         $totalTuplesInserted = 0; # total tuples INSERTed into links_temp
12         $reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table
13         $curReadReportInterval = 1000; #number of rows between progress reports
15         $reportLinksConvProgress = true; #whether or not to give progress reports during conversion
16         $linksConvInsertInterval = 1000; #number of rows per INSERT
18         $initialRowOffset = 0;
19         #$finalRowOffset = 0; # not used yet; highest row number from links table to process
21         # Overwrite the old links table with the new one.  If this is set to false,
22         # the new table will be left at links_temp.
23         $overwriteLinksTable = true;
25         # Don't create keys, and so allow duplicates in the new links table.
26         # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?)
27         $noKeys = false;
30         $logPerformance = false; # output performance data to a file
31         $perfLogFilename = "convLinksPerf.txt";
32         #--------------------------------------------------------------------
34         $res = wfQuery( "SELECT COUNT(*) AS count FROM links", DB_WRITE );
35         $row = wfFetchObject($res);
36         $numRows = $row->count;
37         wfFreeResult( $res );
39         if ( $numRows == 0 ) {
40                 print "No rows to convert. Updating schema...\n";
41                 createTempTable();
42         } else {
43                 $res = wfQuery( "SELECT l_from FROM links LIMIT 1", DB_READ );
44                 if ( mysql_field_type( $res, 0 ) == "int" ) {
45                         print "Schema already converted\n";
46                         return;
47                 }               
48                 if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); }
49                 $baseTime = $startTime = getMicroTime();
50                 # Create a title -> cur_id map
51                 print "Loading IDs from cur table...\n";
52                 performanceLog ( "Reading $numRows rows from cur table...\n" );
53                 performanceLog ( "rows read vs seconds elapsed:\n" );
54                 wfBufferSQLResults( false );
55                 $res = wfQuery( "SELECT cur_namespace,cur_title,cur_id FROM cur", DB_WRITE );
56                 $ids = array();
58                 while ( $row = wfFetchObject( $res ) ) {
59                         $title = $row->cur_title;
60                         if ( $row->cur_namespace ) {
61                                 $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title";
62                         }
63                         $ids[$title] = $row->cur_id;
64                         $curRowsRead++;
65                         if ($reportCurReadProgress) {
66                                 if (($curRowsRead % $curReadReportInterval) == 0) {
67                                         performanceLog( $curRowsRead . " " . (getMicroTime() - $baseTime) . "\n" );
68                                         print "\t$curRowsRead rows of cur table read.\n";       
69                                 }
70                         }
71                 }
72                 wfFreeResult( $res );
73                 wfBufferSQLResults( true );
74                 print "Finished loading IDs.\n\n";
75                 performanceLog( "Took " . (getMicroTime() - $baseTime) . " seconds to load IDs.\n\n" );
76         #--------------------------------------------------------------------
78                 # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
79                 # convert, and write to the new table.
80                 createTempTable();
81                 performanceLog( "Resetting timer.\n\n" );
82                 $baseTime = getMicroTime();
83                 print "Processing $numRows rows from links table...\n";
84                 performanceLog( "Processing $numRows rows from links table...\n" );
85                 performanceLog( "rows inserted vs seconds elapsed:\n" );
86                 
87                 for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) {
88                         $sqlRead = "SELECT * FROM links LIMIT $linksConvInsertInterval OFFSET $rowOffset";
89                         $res = wfQuery($sqlRead, DB_READ);
90                         if ( $noKeys ) {
91                                 $sqlWrite = array("INSERT INTO links_temp(l_from,l_to) VALUES ");
92                         } else {
93                                 $sqlWrite = array("INSERT IGNORE INTO links_temp(l_from,l_to) VALUES ");
94                         }
95                         
96                         $tuplesAdded = 0; # no tuples added to INSERT yet
97                         while ( $row = wfFetchObject($res) ) {
98                                 $fromTitle = $row->l_from; 
99                                 if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
100                                         $from = $ids[$fromTitle];
101                                         $to = $row->l_to;
102                                         if ( $tuplesAdded != 0 ) {
103                                                 $sqlWrite[] = ",";
104                                         }
105                                         $sqlWrite[] = "($from,$to)";
106                                         $tuplesAdded++;                         
107                                 } else { # invalid title
108                                         $numBadLinks++;
109                                 }
110                         }
111                         wfFreeResult($res);
112                         #print "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n";
113                         if ( $tuplesAdded != 0  ) {
114                                 if ($reportLinksConvProgress) {
115                                         print "Inserting $tuplesAdded tuples into links_temp...";
116                                 }
117                                 wfQuery( implode("",$sqlWrite) , DB_WRITE );
118                                 $totalTuplesInserted += $tuplesAdded;
119                                 if ($reportLinksConvProgress)
120                                         print " done. Total $totalTuplesInserted tuples inserted.\n";
121                                         performanceLog( $totalTuplesInserted . " " . (getMicroTime() - $baseTime) . "\n"  );
122                         }
123                 }
124                 print "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n";
125                 performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" );
126                 performanceLog( "Total execution time: " . (getMicroTime() - $startTime) . " seconds.\n" );
127                 if ( $logPerformance ) { fclose ( $fh ); }
128         }
129         #--------------------------------------------------------------------
131         if ( $overwriteLinksTable ) {
132                 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
133                 if (!($dbConn->isOpen())) {
134                         print "Opening connection to database failed.\n";
135                         return;
136                 }
137                 # Check for existing links_backup, and delete it if it exists.
138                 print "Dropping backup links table if it exists...";
139                 $dbConn->query( "DROP TABLE IF EXISTS links_backup", DB_WRITE);
140                 print " done.\n";
141                 
142                 # Swap in the new table, and move old links table to links_backup
143                 print "Swapping tables 'links' to 'links_backup'; 'links_temp' to 'links'...";
144                 $dbConn->query( "RENAME TABLE links TO links_backup, links_temp TO links", DB_WRITE );
145                 print " done.\n\n";
146                 
147                 $dbConn->close();
148                 print "Conversion complete. The old table remains at links_backup;\n";
149                 print "delete at your leisure.\n";
150         } else {
151                 print "Conversion complete.  The converted table is at links_temp;\n";
152                 print "the original links table is unchanged.\n";
153         }
156 #--------------------------------------------------------------------
158 function createTempTable() {
159         global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
160         global $noKeys;
161         $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
162         
163         if (!($dbConn->isOpen())) {
164                 print "Opening connection to database failed.\n";
165                 return;
166         }
167         
168         print "Dropping temporary links table if it exists...";
169         $dbConn->query( "DROP TABLE IF EXISTS links_temp", DB_WRITE);
170         print " done.\n";
171         
172         print "Creating temporary links table...";
173         if ( $noKeys ) {
174                 $dbConn->query( "CREATE TABLE links_temp ( " .
175                 "l_from int(8) unsigned NOT NULL default '0', " .
176                 "l_to int(8) unsigned NOT NULL default '0')", DB_WRITE);
177         } else {
178                 $dbConn->query( "CREATE TABLE links_temp ( " .
179                 "l_from int(8) unsigned NOT NULL default '0', " .
180                 "l_to int(8) unsigned NOT NULL default '0', " .
181                 "UNIQUE KEY l_from(l_from,l_to), " .
182                 "KEY (l_to))", DB_WRITE);
183         }
184         print " done.\n\n";
187 function performanceLog( $text ) {
188         global $logPerformance, $fh;
189         if ( $logPerformance ) {
190                 fwrite( $fh, $text );
191         }
194 function getMicroTime() { # return time in seconds, with microsecond accuracy
195         list($usec, $sec) = explode(" ", microtime());
196         return ((float)$usec + (float)$sec);