* (bug 22748) Add anchors on Special:ListGroupRights
[mediawiki.git] / maintenance / convertLinks.inc
blob7c7b8aff2fe18b6e6d5249131c1f3bc1eaf9ec61
1 <?php
2 /**
3  * @file
4  * @todo document
5  * @ingroup Maintenance
6  */
8 /** */
9 function convertLinks() {
10         global $wgDBtype;
11         if( $wgDBtype == 'postgres' ) {
12                 wfOut( "Links table already ok on Postgres.\n" );
13                 return;
14         }
16         wfOut( "Converting links table to ID-ID...\n" );
18         global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
19         global $noKeys, $logPerformance, $fh;
21         $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc
22         $totalTuplesInserted = 0; # total tuples INSERTed into links_temp
24         $reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table
25         $curReadReportInterval = 1000; #number of rows between progress reports
27         $reportLinksConvProgress = true; #whether or not to give progress reports during conversion
28         $linksConvInsertInterval = 1000; #number of rows per INSERT
30         $initialRowOffset = 0;
31         #$finalRowOffset = 0; # not used yet; highest row number from links table to process
33         # Overwrite the old links table with the new one.  If this is set to false,
34         # the new table will be left at links_temp.
35         $overwriteLinksTable = true;
37         # Don't create keys, and so allow duplicates in the new links table.
38         # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?)
39         $noKeys = false;
42         $logPerformance = false; # output performance data to a file
43         $perfLogFilename = "convLinksPerf.txt";
44         #--------------------------------------------------------------------
46         $dbw = wfGetDB( DB_MASTER );
47         list ($cur, $links, $links_temp, $links_backup) = $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
48         
49         // Get database-agnostic limit clause
50         $sql_limit = $dbw->limitResult( "SELECT l_from FROM $links", 1 );
51         $res = $dbw->query(  $sql_limit );
52         if ( $dbw->fieldType( $res, 0 ) == "int" ) {
53                 wfOut( "Schema already converted\n" );
54                 return;
55         }
57         $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
58         $row = $dbw->fetchObject($res);
59         $numRows = $row->count;
60         $dbw->freeResult( $res );
62         if ( $numRows == 0 ) {
63                 wfOut( "Updating schema (no rows to convert)...\n" );
64                 createTempTable();
65         } else {
66                 if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); }
67                 $baseTime = $startTime = getMicroTime();
68                 # Create a title -> cur_id map
69                 wfOut( "Loading IDs from $cur table...\n" );
70                 performanceLog ( "Reading $numRows rows from cur table...\n" );
71                 performanceLog ( "rows read vs seconds elapsed:\n" );
73                 $dbw->bufferResults( false );
74                 $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
75                 $ids = array();
77                 while ( $row = $dbw->fetchObject( $res ) ) {
78                         $title = $row->cur_title;
79                         if ( $row->cur_namespace ) {
80                                 $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title";
81                         }
82                         $ids[$title] = $row->cur_id;
83                         $curRowsRead++;
84                         if ($reportCurReadProgress) {
85                                 if (($curRowsRead % $curReadReportInterval) == 0) {
86                                         performanceLog( $curRowsRead . " " . (getMicroTime() - $baseTime) . "\n" );
87                                         wfOut( "\t$curRowsRead rows of $cur table read.\n" );
88                                 }
89                         }
90                 }
91                 $dbw->freeResult( $res );
92                 $dbw->bufferResults( true );
93                 wfOut( "Finished loading IDs.\n\n" );
94                 performanceLog( "Took " . (getMicroTime() - $baseTime) . " seconds to load IDs.\n\n" );
95         #--------------------------------------------------------------------
97                 # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
98                 # convert, and write to the new table.
99                 createTempTable();
100                 performanceLog( "Resetting timer.\n\n" );
101                 $baseTime = getMicroTime();
102                 wfOut( "Processing $numRows rows from $links table...\n" );
103                 performanceLog( "Processing $numRows rows from $links table...\n" );
104                 performanceLog( "rows inserted vs seconds elapsed:\n" );
106                 for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) {
107                         $sqlRead = "SELECT * FROM $links ";
108                         $sqlRead = $dbw->limitResult($sqlRead, $linksConvInsertInterval,$rowOffset);
109                         $res = $dbw->query($sqlRead);
110                         if ( $noKeys ) {
111                                 $sqlWrite = array("INSERT INTO $links_temp (l_from,l_to) VALUES ");
112                         } else {
113                                 $sqlWrite = array("INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES ");
114                         }
116                         $tuplesAdded = 0; # no tuples added to INSERT yet
117                         while ( $row = $dbw->fetchObject($res) ) {
118                                 $fromTitle = $row->l_from;
119                                 if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
120                                         $from = $ids[$fromTitle];
121                                         $to = $row->l_to;
122                                         if ( $tuplesAdded != 0 ) {
123                                                 $sqlWrite[] = ",";
124                                         }
125                                         $sqlWrite[] = "($from,$to)";
126                                         $tuplesAdded++;
127                                 } else { # invalid title
128                                         $numBadLinks++;
129                                 }
130                         }
131                         $dbw->freeResult($res);
132                         #wfOut( "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n" );
133                         if ( $tuplesAdded != 0  ) {
134                                 if ($reportLinksConvProgress) {
135                                         wfOut( "Inserting $tuplesAdded tuples into $links_temp..." );
136                                 }
137                                 $dbw->query( implode("",$sqlWrite) );
138                                 $totalTuplesInserted += $tuplesAdded;
139                                 if ($reportLinksConvProgress)
140                                         wfOut( " done. Total $totalTuplesInserted tuples inserted.\n" );
141                                         performanceLog( $totalTuplesInserted . " " . (getMicroTime() - $baseTime) . "\n"  );
142                         }
143                 }
144                 wfOut( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n" );
145                 performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" );
146                 performanceLog( "Total execution time: " . (getMicroTime() - $startTime) . " seconds.\n" );
147                 if ( $logPerformance ) { fclose ( $fh ); }
148         }
149         #--------------------------------------------------------------------
151         if ( $overwriteLinksTable ) {
152                 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
153                 if (!($dbConn->isOpen())) {
154                         wfOut( "Opening connection to database failed.\n" );
155                         return;
156                 }
157                 # Check for existing links_backup, and delete it if it exists.
158                 wfOut( "Dropping backup links table if it exists..." );
159                 $dbConn->query( "DROP TABLE IF EXISTS $links_backup", DB_MASTER);
160                 wfOut( " done.\n" );
162                 # Swap in the new table, and move old links table to links_backup
163                 wfOut( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
164                 $dbConn->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", DB_MASTER );
165                 wfOut( " done.\n\n" );
167                 $dbConn->close();
168                 wfOut( "Conversion complete. The old table remains at $links_backup;\n" );
169                 wfOut( "delete at your leisure.\n" );
170         } else {
171                 wfOut( "Conversion complete.  The converted table is at $links_temp;\n" );
172                 wfOut( "the original links table is unchanged.\n" );
173         }
176 #--------------------------------------------------------------------
178 function createTempTable() {
179         global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
180         global $noKeys;
181         $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
183         if (!($dbConn->isOpen())) {
184                 wfOut( "Opening connection to database failed.\n" );
185                 return;
186         }
187         $links_temp = $dbConn->tableName( 'links_temp' );
189         wfOut( "Dropping temporary links table if it exists..." );
190         $dbConn->query( "DROP TABLE IF EXISTS $links_temp");
191         wfOut( " done.\n" );
193         wfOut( "Creating temporary links table..." );
194         if ( $noKeys ) {
195                 $dbConn->query( "CREATE TABLE $links_temp ( " .
196                 "l_from int(8) unsigned NOT NULL default '0', " .
197                 "l_to int(8) unsigned NOT NULL default '0')");
198         } else {
199                 $dbConn->query( "CREATE TABLE $links_temp ( " .
200                 "l_from int(8) unsigned NOT NULL default '0', " .
201                 "l_to int(8) unsigned NOT NULL default '0', " .
202                 "UNIQUE KEY l_from(l_from,l_to), " .
203                 "KEY (l_to))");
204         }
205         wfOut( " done.\n\n" );
208 function performanceLog( $text ) {
209         global $logPerformance, $fh;
210         if ( $logPerformance ) {
211                 fwrite( $fh, $text );
212         }
215 function getMicroTime() { # return time in seconds, with microsecond accuracy
216         list($usec, $sec) = explode(" ", microtime());
217         return ((float)$usec + (float)$sec);