* Add a h2 header to the begin of the results
[mediawiki.git] / maintenance / populateParentId.inc
blob3fecc63b39980b8d9023f37f4bdc5aab151244e0
1 <?php
3 define( 'BATCH_SIZE', 200 );
5 function populate_rev_parent_id( $db ) {
6         echo "Populating rev_parent_id column\n";
7         $start = $db->selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ );
8         $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ );
9         if( is_null( $start ) || is_null( $end ) ){
10                 echo "...revision table seems to be empty.\n";
11                 $db->insert( 'updatelog',
12                         array( 'ul_key' => 'populate rev_parent_id' ),
13                         __FUNCTION__,
14                         'IGNORE' );
15                 return;
16         }
17         # Do remaining chunk
18         $end += BATCH_SIZE - 1;
19         $blockStart = $start;
20         $blockEnd = $start + BATCH_SIZE - 1;
21         $count = 0;
22         $changed = 0;
23         while( $blockEnd <= $end ) {
24                 echo "...doing rev_id from $blockStart to $blockEnd\n";
25                 $cond = "rev_id BETWEEN $blockStart AND $blockEnd";
26                 $res = $db->select( 'revision', 
27                         array('rev_id','rev_page','rev_timestamp','rev_parent_id'), 
28                         $cond, __FUNCTION__ );
29                 # Go through and update rev_parent_id from these rows.
30                 # Assume that the previous revision of the title was
31                 # the original previous revision of the title when the
32                 # edit was made...
33                 foreach( $res as $row ) {
34                         # First, check rows with the same timestamp other than this one
35                         # with a smaller rev ID. The highest ID "wins". This avoids loops
36                         # as timestamp can only decrease and never loops with IDs (from parent to parent)
37                         $previousID = $db->selectField( 'revision', 'rev_id', 
38                                 array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $row->rev_timestamp,
39                                         "rev_id < {$row->rev_id}" ), 
40                                 __FUNCTION__,
41                                 array( 'ORDER BY' => 'rev_id DESC' ) );
42                         # If there are none, check the the highest ID with a lower timestamp
43                         if( !$previousID ) {
44                                 # Get the highest older timestamp
45                                 $lastTimestamp = $db->selectField( 'revision', 'rev_timestamp', 
46                                         array( 'rev_page' => $row->rev_page, "rev_timestamp < '{$row->rev_timestamp}'" ), 
47                                         __FUNCTION__,
48                                         array( 'ORDER BY' => 'rev_timestamp DESC' ) );
49                                 # If there is one, let the highest rev ID win
50                                 if( $lastTimestamp ) {
51                                         $previousID = $db->selectField( 'revision', 'rev_id', 
52                                                 array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $lastTimestamp ), 
53                                                 __FUNCTION__,
54                                                 array( 'ORDER BY' => 'rev_id DESC' ) );
55                                 }
56                         }
57                         $previousID = intval($previousID);
58                         if( $previousID != $row->rev_parent_id )
59                                 $changed++;
60                         # Update the row...
61                         $db->update( 'revision',
62                                 array( 'rev_parent_id' => $previousID ),
63                                 array( 'rev_id' => $row->rev_id ),
64                                 __FUNCTION__ );
65                         $count++;
66                 }
67                 $blockStart += BATCH_SIZE - 1;
68                 $blockEnd += BATCH_SIZE - 1;
69                 wfWaitForSlaves( 5 );
70         }
71         $logged = $db->insert( 'updatelog',
72                 array( 'ul_key' => 'populate rev_parent_id' ),
73                 __FUNCTION__,
74                 'IGNORE' );
75         if( $logged ) {
76                 echo "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n";
77                 return true;
78         } else {
79                 echo "Could not insert rev_parent_id population row.\n";
80                 return false;
81         }