Guarding newFromText calls
[mediawiki.git] / maintenance / rebuildlinks.inc
blob8be43362acfd4875e133f23661ec39dd7356394a
1 <?
3 # Functions for rebuilding the link tracking tables; must
4 # be included within a script that also includes the Setup.
5 # See rebuildlinks.php, for example.
8 # Turn this on if you've got memory to burn
9 $wgUseMemoryTables = false;
11 function rebuildLinkTablesPass1()
13         global $wgLang, $wgUseMemoryTables;
14         $count = 0;
15         print "Rebuilding link tables (pass 1).\n";
17         $sql = "DROP TABLE IF EXISTS rebuildlinks";
18         wfQuery( $sql, DB_WRITE );
20         $sql = "CREATE TABLE rebuildlinks (
21   rl_f_id int(8) unsigned NOT NULL default 0,
22   rl_f_title varchar(255) binary NOT NULL default '',
23   rl_to varchar(255) binary NOT NULL default '',
24   INDEX rl_to (rl_to) )";
25         if( $wgUseMemoryTables ) $sql .= " TYPE=heap";
26         wfQuery( $sql, DB_WRITE );
28         $sql = "LOCK TABLES cur READ, rebuildlinks WRITE, interwiki READ, user_newtalk READ";
29         wfQuery( $sql, DB_WRITE );
31         $sql = "DELETE FROM rebuildlinks";
32         wfQuery( $sql, DB_WRITE );
34         $sql = "SELECT cur_id,cur_namespace,cur_title,cur_text FROM cur";
35         $res = wfQuery( $sql, DB_WRITE );
36         $total = wfNumRows( $res );
38         $tc = Title::legalChars();
39         while ( $row = wfFetchObject( $res ) ) {
40                 $id = $row->cur_id;
41                 $ns = $wgLang->getNsText( $row->cur_namespace );
42                 if ( "" == $ns ) {
43                         $title = addslashes( $row->cur_title );
44                 } else {
45                         $title = addslashes( "$ns:{$row->cur_title}" );
46                 }
47                 $text = $row->cur_text;
48                 $numlinks = preg_match_all( "/\\[\\[([{$tc}]+)(]|\\|)/", $text,
49                   $m, PREG_PATTERN_ORDER );
51                 if ( 0 != $numlinks ) {
52                         $first = true;
53                         $sql = "INSERT INTO rebuildlinks (rl_f_id,rl_f_title,rl_to) VALUES ";
54                         for ( $i = 0; $i < $numlinks; ++$i ) {
55                                 if( preg_match( '/^(http|https|ftp|mailto|news):/', $m[1][$i] ) ) {
56                                         # an URL link; not for us!
57                                         continue;
58                                 }
59                                 # FIXME: Handle subpage links
60                                 $nt = Title::newFromText( $m[1][$i] );
61                                 if (! $nt)
62                                 {
63                                         $txt = $m[1][$i];
64                                         print "error in '$ns:{$row->cur_title}' :\t'$txt'\n";
65                                         continue;
66                                 }
67                                 if( $nt->getInterwiki() != "" ) {
68                                         # Interwiki links are not stored in the link tables
69                                         continue;
70                                 }
71                                 if( $nt->getNamespace() == Namespace::getSpecial() ) {
72                                         # Special links not stored in link tables
73                                         continue;
74                                 }
75                                 if( $nt->getNamespace() == Namespace::getMedia() ) {
76                                         # treat media: links as image: links
77                                         $nt = Title::makeTitle( Namespace::getImage(), $nt->getDBkey() );
78                                 }
80                                 if (!$first)
81                                         $sql .= ",";
82                                 else
83                                         $first = false;
85                                 $dest = addslashes( $nt->getPrefixedDBkey() );
86                                 $sql .= "({$id},'{$title}','{$dest}')";
87                         }
89                         if (! $first) { wfQuery( $sql, DB_WRITE  ); }
90                 }
91                 if ( ( ++$count % 1000 ) == 0 ) {
92                         print "$count of $total articles scanned.\n";
93                 }
94         }
95         print "$total articles scanned.\n";
96         mysql_free_result( $res );
98         $sql = "UNLOCK TABLES";
99         wfQuery( $sql, DB_WRITE );
102 function rebuildLinkTablesPass2()
104         global $wgLang;
105         $count = 0;
106         print "Rebuilding link tables (pass 2).\n";
108         $sql = "LOCK TABLES cur READ, rebuildlinks READ, interwiki READ, " .
109           "links WRITE, brokenlinks WRITE, imagelinks WRITE";
110         wfQuery( $sql, DB_WRITE );
112         $sql = "DELETE FROM links";
113         wfQuery( $sql, DB_WRITE );
115         $sql = "DELETE FROM brokenlinks";
116         wfQuery( $sql, DB_WRITE );
118         $sql = "DELETE FROM imagelinks";
119         wfQuery( $sql, DB_WRITE );
121         $ins = $wgLang->getNsText( Namespace::getImage() );
122         $inslen = strlen($ins)+1;
123         $sql = "SELECT rl_f_title,rl_to FROM rebuildlinks " .
124           "WHERE rl_to LIKE '$ins:%'";
125         $res = wfQuery( $sql, DB_WRITE );
127         $sql = "INSERT INTO imagelinks (il_from,il_to) VALUES ";
128         $first = true;
129         while ( $row = wfFetchObject( $res ) )
130         {
131                 $iname = addslashes( substr( $row->rl_to, $inslen ) );
132                 $pname = addslashes( $row->rl_f_title );
134                 if ( ! $first )
135                         $sql .= ",";
136                 else
137                         $first = false;
139                 $sql .= "('{$pname}','{$iname}')";
140         }
141         wfFreeResult( $res );
142         if ( ! $first ) { wfQuery( $sql, DB_WRITE ); }
144         $sql = "SELECT DISTINCT rl_to FROM rebuildlinks ORDER BY rl_to";
145         $res = wfQuery( $sql, DB_WRITE );
146         $count = 0;
147         $total = wfNumRows( $res );
149         while ( $row = wfFetchObject( $res ) ) {
150                 if ( 0 == strncmp( "$ins:", $row->rl_to, $inslen ) ) { continue; }
152                 $nt = Title::newFromDBkey( $row->rl_to );
153                 if (! $nt)
154                 {
155                         print "error pass2: '{$row->rl_to}'\n";
156                         continue;
157                 }
158                 $id = $nt->getArticleID();
159                 $to = addslashes( $row->rl_to );
161                 if ( 0 == $id ) {
162                         $sql = "SELECT DISTINCT rl_f_id FROM rebuildlinks WHERE rl_to='{$to}'";
163                         $res2 = wfQuery( $sql, DB_WRITE );
165                         $sql = "INSERT INTO brokenlinks (bl_from,bl_to) VALUES ";
166                         $first = true;
167                         while ( $row2 = wfFetchObject( $res2 ) )
168                         {
169                                 if (! $first)
170                                         $sql .= ",";
171                                 else
172                                         $first = false;
174                                 $from = $row2->rl_f_id;
175                                 $sql .= "({$from},'{$to}')";
176                         }
177                         wfFreeResult( $res2 );
178                         if ( ! $first ) { wfQuery( $sql, DB_WRITE ); }
179                 } else {
180                         $sql = "SELECT DISTINCT rl_f_title FROM rebuildlinks WHERE rl_to='{$to}'";
181                         $res2 = wfQuery( $sql, DB_WRITE );
183                         $sql = "INSERT INTO links (l_from,l_to) VALUES ";
184                         $first = true;
185                         while ( $row2 = wfFetchObject( $res2 ) )
186                         {
187                                 if (! $first)
188                                         $sql .= ",";
189                                 else
190                                         $first = false;
192                                 $from = addslashes( $row2->rl_f_title );
193                                 $sql .= "('{$from}',{$id})";
194                         }
195                         wfFreeResult( $res2 );
196                         if ( ! $first ) { wfQuery( $sql, DB_WRITE ); }
197                 }
198                 if ( ( ++$count % 1000 ) == 0 ) {
199                         print "$count of $total titles processed.\n";
200                 }
201         }
202         wfFreeResult( $res );
204         $sql = "UNLOCK TABLES";
205         wfQuery( $sql, DB_WRITE );
207         $sql = "DROP TABLE rebuildlinks";
208         wfQuery( $sql, DB_WRITE );