Clarify Postgres text indexing process.
[mediawiki.git] / maintenance / refreshLinks.inc
blob6d68e27782822b1ba825453e627806d3899dacc5
1 <?php
2 /**
3  * @todo document
4  * @file
5  * @ingroup Maintenance
6  */
8 function refreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) {
9         global $wgUser, $wgParser, $wgUseTidy;
11         $reportingInterval = 100;
12         $fname = 'refreshLinks';
13         $dbr = wfGetDB( DB_SLAVE );
14         $start = intval( $start );
16         # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
17         $wgUser->setOption('math', MW_MATH_SOURCE);
19         # Don't generate extension images (e.g. Timeline)
20         $wgParser->clearTagHooks();
22         # Don't use HTML tidy
23         $wgUseTidy = false;
25         $what = $redirectsOnly ? "redirects" : "links";
27         if( $oldRedirectsOnly ) {
28                 # This entire code path is cut-and-pasted from below.  Hurrah.
29                 $res = $dbr->query(
30                         "SELECT page_id ".
31                         "FROM page ".
32                         "LEFT JOIN redirect ON page_id=rd_from ".
33                         "WHERE page_is_redirect=1 AND rd_from IS NULL AND ".
34                         ($end == 0 ? "page_id >= $start"
35                                    : "page_id BETWEEN $start AND $end"),
36                         $fname
37                 );
38                 $num = $dbr->numRows( $res );
39                 print "Refreshing $num old redirects from $start...\n";
41                 while( $row = $dbr->fetchObject( $res ) ) {
42                         if ( !( ++$i % $reportingInterval ) ) {
43                                 print "$i\n";
44                                 wfWaitForSlaves( $maxLag );
45                         }
46                         fixRedirect( $row->page_id );
47                 }
48         } elseif( $newOnly ) {
49                 print "Refreshing $what from ";
50                 $res = $dbr->select( 'page',
51                         array( 'page_id' ),
52                         array(
53                                 'page_is_new' => 1,
54                                 "page_id >= $start" ),
55                         $fname
56                 );
57                 $num = $dbr->numRows( $res );
58                 print "$num new articles...\n";
60                 $i = 0;
61                 while ( $row = $dbr->fetchObject( $res ) ) {
62                         if ( !( ++$i % $reportingInterval ) ) {
63                                 print "$i\n";
64                                 wfWaitForSlaves( $maxLag );
65                         }
66                         if($redirectsOnly)
67                                 fixRedirect( $row->page_id );
68                         else
69                                 fixLinksFromArticle( $row->page_id );
70                 }
71         } else {
72                 print "Refreshing $what table.\n";
73                 if ( !$end ) {
74                         $end = $dbr->selectField( 'page', 'max(page_id)', false );
75                 }
76                 print("Starting from page_id $start of $end.\n");
78                 for ($id = $start; $id <= $end; $id++) {
80                         if ( !($id % $reportingInterval) ) {
81                                 print "$id\n";
82                                 wfWaitForSlaves( $maxLag );
83                         }
84                         if($redirectsOnly)
85                                 fixRedirect( $id );
86                         else
87                                 fixLinksFromArticle( $id );
88                 }
89         }
92 function fixRedirect( $id ){
93         global $wgTitle, $wgArticle;
95         $wgTitle = Title::newFromID( $id );
96         $dbw = wfGetDB( DB_MASTER );
98         if ( is_null( $wgTitle ) ) {
99                 return;
100         }
101         $wgArticle = new Article($wgTitle);
103         $rt = $wgArticle->followRedirect();
105         if($rt == false || !is_object($rt))
106                 return;
108         $wgArticle->updateRedirectOn($dbw,$rt);
111 function fixLinksFromArticle( $id ) {
112         global $wgTitle, $wgParser;
113         
114         $wgTitle = Title::newFromID( $id );
115         $dbw = wfGetDB( DB_MASTER );
117         $linkCache =& LinkCache::singleton();
118         $linkCache->clear();
119         
120         if ( is_null( $wgTitle ) ) {
121                 return;
122         }
123         $dbw->begin();
125         $revision = Revision::newFromTitle( $wgTitle );
126         if ( !$revision ) {
127                 return;
128         }
130         $options = new ParserOptions;
131         $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() );
132         $update = new LinksUpdate( $wgTitle, $parserOutput, false );
133         $update->doUpdate();
134         $dbw->immediateCommit();
137 function deleteLinksFromNonexistent( $maxLag = 0 ) {
138         $fname = 'deleteLinksFromNonexistent';
140         wfWaitForSlaves( $maxLag );
142         $dbw = wfGetDB( DB_MASTER );
144         $linksTables = array(
145                 'pagelinks' => 'pl_from',
146                 'imagelinks' => 'il_from',
147                 'categorylinks' => 'cl_from',
148                 'templatelinks' => 'tl_from',
149                 'externallinks' => 'el_from',
150         );
152         $page = $dbw->tableName( 'page' );
155         foreach ( $linksTables as $table => $field ) {
156                 if ( !$dbw->ping() ) {
157                         print "DB disconnected, reconnecting...";
158                         while ( !$dbw->ping() ) {
159                                 print ".";
160                                 sleep(10);
161                         }
162                         print "\n";
163                 }
165                 $pTable = $dbw->tableName( $table );
166                 $sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL";
168                 print "Deleting $table from non-existent articles...";
169                 $dbw->query( $sql, $fname );
170                 print " fixed " .$dbw->affectedRows() . " row(s)\n";
171         }