3 * Rebuild interwiki table using the file on meta and the language list
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
23 * @ingroup Maintenance
29 * @ingroup Maintenance
32 var $suffix, $lateral, $url;
34 function __construct( $s, $l, $u ) {
40 function getURL( $lang ) {
41 $xlang = str_replace( '_', '-', $lang );
42 return "http://$xlang.{$this->url}/wiki/\$1";
46 require_once( dirname( __FILE__
) . '/Maintenance.php' );
48 class RebuildInterwiki
extends Maintenance
{
49 public function __construct() {
50 parent
::__construct();
51 $this->mDescription
= "Rebuild the interwiki table using the file on meta and the language list.";
52 $this->addOption( 'langlist', 'File with one language code per line', false, true );
53 $this->addOption( 'dblist', 'File with one db per line', false, true );
54 $this->addOption( 'd', 'Output folder', false, true );
58 # List of language prefixes likely to be found in multi-language sites
59 $this->langlist
= array_map( "trim", file( $this->getOption( 'langlist', "/home/wikipedia/common/langlist" ) ) );
61 # List of all database names
62 $this->dblist
= array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) );
64 # Special-case databases
65 //$this->specials = array_flip( array_map( "trim", file( $this->getOption( 'specialdbs', "/home/wikipedia/common/special.dblist" ) ) ) );
67 $this->makeInterwikiSQL( $this->getOption( 'd', '/home/wikipedia/conf/interwiki/sql' ) );
70 function makeInterwikiSQL( $destDir ) {
71 $this->output( "Making new interwiki SQL files in $destDir\n" );
73 # Multi-language sites
74 # db suffix => db suffix, iw prefix, hostname
76 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ),
77 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ),
78 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ),
79 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ),
80 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ),
81 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ),
82 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ),
83 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ),
86 # Special-case hostnames
87 $this->specials
= array(
88 'sourceswiki' => 'sources.wikipedia.org',
89 'quotewiki' => 'wikiquote.org',
90 'textbookwiki' => 'wikibooks.org',
91 'sep11wiki' => 'sep11.wikipedia.org',
92 'metawiki' => 'meta.wikimedia.org',
93 'commonswiki' => 'commons.wikimedia.org',
94 'specieswiki' => 'species.wikimedia.org',
97 # Extra interwiki links that can't be in the intermap for some reason
99 array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ),
100 array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ),
101 array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ),
104 # Language aliases, usually configured as redirects to the real wiki in apache
105 # Interlanguage links are made directly to the real wiki
106 # Something horrible happens if you forget to list an alias here, I can't
108 $this->languageAliases
= array(
115 # Special case prefix rewrites, for the benefit of Swedish which uses s:t
116 # as an abbreviation for saint
117 $this->prefixRewrites
= array(
118 'svwiki' => array( 's' => 'src' ),
121 # Construct a list of reserved prefixes
123 foreach ( $this->langlist
as $lang ) {
124 $reserved[$lang] = 1;
126 foreach ( $this->languageAliases
as $alias => $lang ) {
127 $reserved[$alias] = 1;
129 foreach ( $sites as $site ) {
130 $reserved[$site->lateral
] = 1;
133 # Extract the intermap from meta
134 $intermap = Http
::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 );
135 $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
137 if ( !$lines ||
count( $lines ) < 2 ) {
138 $this->error( "m:Interwiki_map not found", true );
143 foreach ( $lines as $line ) {
145 if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(https?:\/\/.*?)\s*$/', $line, $matches ) ) {
146 $prefix = strtolower( $matches[1] );
148 if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) {
154 if ( empty( $reserved[$prefix] ) ) {
155 $iwArray[$prefix] = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local );
160 foreach ( $this->dblist
as $db ) {
161 $sql = "-- Generated by rebuildInterwiki.php";
162 if ( isset( $this->specials
[$db] ) ) {
164 # Has interwiki links and interlanguage links to wikipedia
166 $host = $this->specials
[$db];
167 $sql .= "\n--$host\n\n";
168 $sql .= "USE $db;\n" .
169 "TRUNCATE TABLE interwiki;\n" .
170 "INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES \n";
174 foreach ( $iwArray as $iwEntry ) {
175 $sql .= $this->makeLink( $iwEntry, $first, $db );
178 # Links to multilanguage sites
179 foreach ( $sites as $targetSite ) {
180 $sql .= $this->makeLink( array( $targetSite->lateral
, $targetSite->getURL( 'en' ), 1 ), $first, $db );
183 # Interlanguage links to wikipedia
184 $sql .= $this->makeLanguageLinks( $sites['wiki'], $first, $db );
187 foreach ( $extraLinks as $link ) {
188 $sql .= $this->makeLink( $link, $first, $db );
193 # Find out which site this DB belongs to
195 foreach ( $sites as $candidateSite ) {
196 $suffix = $candidateSite->suffix
;
197 if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) {
198 $site = $candidateSite;
203 print "Invalid database $db\n";
207 $host = "$lang." . $site->url
;
208 $sql .= "\n--$host\n\n";
210 $sql .= "USE $db;\n" .
211 "TRUNCATE TABLE interwiki;\n" .
212 "INSERT INTO interwiki (iw_prefix,iw_url,iw_local) VALUES\n";
216 foreach ( $iwArray as $iwEntry ) {
217 # Suppress links with the same name as the site
218 if ( ( $suffix == 'wiki' && $iwEntry['iw_prefix'] != 'wikipedia' ) ||
219 ( $suffix != 'wiki' && $suffix != $iwEntry['iw_prefix'] ) )
221 $sql .= $this->makeLink( $iwEntry, $first, $db );
226 foreach ( $sites as $targetSite ) {
227 # Suppress link to self
228 if ( $targetSite->suffix
!= $site->suffix
) {
229 $sql .= $this->makeLink( array( $targetSite->lateral
, $targetSite->getURL( $lang ), 1 ), $first, $db );
233 # Interlanguage links
234 $sql .= $this->makeLanguageLinks( $site, $first, $db );
236 # w link within wikipedias
237 # Other sites already have it as a lateral link
238 if ( $site->suffix
== "wiki" ) {
239 $sql .= $this->makeLink( array( "w", "http://en.wikipedia.org/wiki/$1", 1 ), $first, $db );
243 foreach ( $extraLinks as $link ) {
244 $sql .= $this->makeLink( $link, $first, $db );
248 file_put_contents( "$destDir/$db.sql", $sql );
252 # ------------------------------------------------------------------------------------------
254 # Returns part of an INSERT statement, corresponding to all interlanguage links to a particular site
255 function makeLanguageLinks( &$site, &$first, $source ) {
258 # Actual languages with their own databases
259 foreach ( $this->langlist
as $targetLang ) {
260 $sql .= $this->makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $first, $source );
264 foreach ( $this->languageAliases
as $alias => $lang ) {
265 $sql .= $this->makeLink( array( $alias, $site->getURL( $lang ), 1 ), $first, $source );
270 # Make SQL for a single link from an array
271 function makeLink( $entry, &$first, $source ) {
273 if ( isset( $this->prefixRewrites
[$source] ) && isset($entry[0]) && isset( $this->prefixRewrites
[$source][$entry[0]] ) ) {
274 $entry[0] = $this->prefixRewrites
[$source][$entry[0]];
284 $dbr = wfGetDB( DB_SLAVE
);
285 $sql .= "(" . $dbr->makeList( $entry ) . ")";
290 $maintClass = "RebuildInterwiki";
291 require_once( DO_MAINTENANCE
);