BUG 1653 - Removing hardcoded messages in Special:Allmessages
[mediawiki.git] / languages / LanguageZh.php
blobbdefc3ac38c04b6be4e6d584997ff6288db044c4
1 <?php
2 require_once( "LanguageZh_cn.php");
3 require_once( "LanguageZh_tw.php");
4 require_once( "LanguageZh_sg.php");
5 require_once( "LanguageZh_hk.php");
7 /*
8 hook to refresh the cache of conversion tables when
9 MediaWiki:zhconversiontable* is updated
11 function zhOnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section) {
12 $titleobj = $article->getTitle();
13 if($titleobj->getNamespace() == NS_MEDIAWIKI) {
14 global $wgContLang; // should be an LanguageZh.
15 if(get_class($wgContLang) != 'languagezh')
16 return true;
18 $title = $titleobj->getDBkey();
19 $t = explode('/', $title, 3);
20 $c = count($t);
21 if( $c > 1 && $t[0] == 'Zhconversiontable' ) {
22 if(in_array($t[1], array('zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'))) {
23 $wgContLang->reloadTables();
27 return true;
30 $wgHooks['ArticleSaveComplete'][] = 'zhOnArticleSaveComplete';
32 /* class that handles both Traditional and Simplified Chinese
33 right now it only distinguish zh_cn and zh_tw (actuall, zh_cn and
34 non-zh_cn), will add support for zh_sg, zh_hk, etc, later.
36 class LanguageZh extends LanguageZh_cn {
38 var $mZhLanguageCode=false;
39 var $mTables=false; //the mapping tables
40 var $mTablesLoaded = false;
41 var $mCacheKey;
42 var $mDoTitleConvert = true, $mDoContentConvert = true;
44 function LanguageZh() {
45 global $wgDBname;
46 $this->mCacheKey = $wgDBname . ":zhtables";
49 // a write lock
50 function lockCache() {
51 global $wgMemc;
52 $success = false;
53 for($i=0; $i<30; $i++) {
54 if($success = $wgMemc->add($this->mCacheKey . "lock", 1, 10))
55 break;
56 sleep(1);
58 return $success;
61 function unlockCache() {
62 global $wgMemc;
63 $wgMemc->delete($this->mCacheKey . "lock");
66 function updateTable($code, $table) {
67 global $wgMemc;
68 if(!$this->mTablesLoaded)
69 $this->loadTables();
71 $this->mTables[$code] = array_merge($this->mTables[$code], $table);
72 if($this->lockCache()) {
73 $wgMemc->delete($this->mCacheKey);
74 $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
75 $this->unlockCache();
79 function reloadTables() {
80 if($this->mTables)
81 unset($this->mTables);
82 $this->mTablesLoaded = false;
83 $this->loadTables(false);
86 // load conversion tables either from the cache or the disk
87 function loadTables($fromcache=true) {
88 global $wgMemc;
89 if( $this->mTablesLoaded )
90 return;
91 $this->mTablesLoaded = true;
92 if($fromcache) {
93 $this->mTables = $wgMemc->get( $this->mCacheKey );
94 if( !empty( $this->mTables ) ) //all done
95 return;
97 // not in cache, or we need a fresh reload.
98 // we will first load the tables from file
99 // then update them using things in MediaWiki:Zhconversiontable/*
100 global $wgMessageCache;
101 require( "includes/ZhConversion.php" );
102 $this->mTables = array();
103 $this->mTables['zh-cn'] = $zh2CN;
104 $this->mTables['zh-tw'] = $zh2TW;
105 $this->mTables['zh-sg'] = array_merge($zh2CN, $zh2SG);
106 $this->mTables['zh-hk'] = array_merge($zh2TW, $zh2HK);
108 $cached = $this->parseCachedTable('zh-cn');
109 $this->mTables['zh-cn'] = array_merge($this->mTables['zh-cn'], $cached);
111 $cached = $this->parseCachedTable('zh-tw');
112 $this->mTables['zh-tw'] = array_merge($this->mTables['zh-tw'], $cached);
114 $cached = $this->parseCachedTable('zh-sg');
115 $this->mTables['zh-sg'] = array_merge($this->mTables['zh-sg'], $cached);
117 $cached = $this->parseCachedTable('zh-hk');
118 $this->mTables['zh-hk'] = array_merge($this->mTables['zh-hk'], $cached);
119 if($this->lockCache()) {
120 $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
121 $this->unlockCache();
127 parse the conversion table stored in the cache
129 the tables should be in blocks of the following form:
132 word => word ;
133 word => word ;
137 to make the tables more manageable, subpages are allowed
138 and will be parsed recursively if $recursive=true
141 function parseCachedTable($code, $subpage='', $recursive=true) {
142 global $wgMessageCache;
143 static $parsed = array();
145 if(!is_object($wgMessageCache))
146 return array();
148 $key = 'zhconversiontable/'.$code;
149 if($subpage)
150 $key .= '/' . $subpage;
152 if(array_key_exists($key, $parsed))
153 return array();
156 $txt = $wgMessageCache->get( $key, true, true, true );
158 // get all subpage links of the form
159 // [[MediaWiki:Zhconversiontable/zh-xx/...|...]]
160 $linkhead = $this->getNsText(NS_MEDIAWIKI) . ':Zhconversiontable';
161 $subs = explode('[[', $txt);
162 $sublinks = array();
163 foreach( $subs as $sub ) {
164 $link = explode(']]', $sub, 2);
165 if(count($link) != 2)
166 continue;
167 $b = explode('|', $link[0]);
168 $b = explode('/', trim($b[0]), 3);
169 if(count($b)==3)
170 $sublink = $b[2];
171 else
172 $sublink = '';
174 if($b[0] == $linkhead && $b[1] == $code) {
175 $sublinks[] = $sublink;
180 // parse the mappings in this page
181 $blocks = explode('-{', $txt);
182 array_shift($blocks);
183 $ret = array();
184 foreach($blocks as $block) {
185 $mappings = explode('}-', $block, 2);
186 $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
187 $table = explode( ';', $stripped );
188 foreach( $table as $t ) {
189 $m = explode( '=>', $t );
190 if( count( $m ) != 2)
191 continue;
192 // trim any trailling comments starting with '//'
193 $tt = explode('//', $m[1], 2);
194 $ret[trim($m[0])] = trim($tt[0]);
197 $parsed[$key] = true;
200 // recursively parse the subpages
201 if($recursive) {
202 foreach($sublinks as $link) {
203 $s = $this->parseCachedTable($code, $link, $recursive);
204 $ret = array_merge($ret, $s);
207 return $ret;
211 get preferred language variants.
213 function getPreferredVariant() {
214 global $wgUser, $wgRequest;
216 if($this->mZhLanguageCode)
217 return $this->mZhLanguageCode;
219 // see if the preference is set in the request
220 $zhreq = $wgRequest->getText( 'variant' );
221 if( in_array( $zhreq, $this->getVariants() ) ) {
222 $this->mZhLanguageCode = $zhreq;
223 return $zhreq;
226 // get language variant preference from logged in users
227 if( $wgUser->isLoggedIn() ) {
228 $this->mZhLanguageCode = $wgUser->getOption('variant');
231 if( !$this->mZhLanguageCode ) {
232 // see if some zh- variant is set in the http header,
233 $this->mZhLanguageCode="zh";
234 if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
235 $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
236 $zh = strstr($header, 'zh-');
237 if($zh) {
238 $this->mZhLanguageCode = substr($zh,0,5);
242 return $this->mZhLanguageCode;
245 # this should give much better diff info
246 function segmentForDiff( $text ) {
247 return preg_replace(
248 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
249 "' ' .\"$1\"", $text);
252 function unsegmentForDiff( $text ) {
253 return preg_replace(
254 "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
255 "\"$1\"", $text);
258 function autoConvert($text, $toVariant=false) {
259 $fname="LanguageZh::autoConvert";
260 wfProfileIn( $fname );
262 if(!$this->mTablesLoaded)
263 $this->loadTables();
265 if(!$toVariant)
266 $toVariant = $this->getPreferredVariant();
267 $ret = '';
268 switch( $toVariant ) {
269 case 'zh-cn': $ret = strtr($text, $this->mTables['zh-cn']);break;
270 case 'zh-tw': $ret = strtr($text, $this->mTables['zh-tw']);break;
271 case 'zh-sg': $ret = strtr($text, $this->mTables['zh-sg']);break;
272 case 'zh-hk': $ret = strtr($text, $this->mTables['zh-hk']);break;
273 default: $ret = $text;
275 wfProfileOut( $fname );
276 return $ret;
279 function autoConvertToAllVariants($text) {
280 $fname="LanguageZh::autoConvertToAllVariants";
281 wfProfileIn( $fname );
282 if( !$this->mTablesLoaded )
283 $this->loadTables();
285 $ret = array();
286 $ret['zh-cn'] = strtr($text, $this->mTables['zh-cn']);
287 $ret['zh-tw'] = strtr($text, $this->mTables['zh-tw']);
288 $ret['zh-sg'] = strtr(strtr($text, $this->mTables['zh-cn']), $this->mTables['zh-sg']);
289 $ret['zh-hk'] = strtr(strtr($text, $this->mTables['zh-tw']), $this->mTables['zh-hk']);
290 wfProfileOut( $fname );
291 return $ret;
294 # convert text to different variants of a language. the automatic
295 # conversion is done in autoConvert(). here we parse the text
296 # marked with -{}-, which specifies special conversions of the
297 # text that can not be accomplished in autoConvert()
299 # syntax of the markup:
300 # -{code1:text1;code2:text2;...}- or
301 # -{text}- in which case no conversion should take place for text
302 function convert( $text , $isTitle=false) {
303 global $wgDisableLangConversion;
304 if($wgDisableLangConversion)
305 return $text;
307 $mw =& MagicWord::get( MAG_NOTITLECONVERT );
308 if( $mw->matchAndRemove( $text ) )
309 $this->mDoTitleConvert = false;
311 $mw =& MagicWord::get( MAG_NOCONTENTCONVERT );
312 if( $mw->matchAndRemove( $text ) ) {
313 $this->mDoContentConvert = false;
316 // no conversion if redirecting
317 $mw =& MagicWord::get( MAG_REDIRECT );
318 if( $mw->matchStart( $text ))
319 return $text;
321 if( $isTitle ) {
322 if( !$this->mDoTitleConvert )
323 return $text;
325 global $wgRequest;
326 $isredir = $wgRequest->getText( 'redirect', 'yes' );
327 $action = $wgRequest->getText( 'action' );
328 if ( $isredir == 'no' || $action == 'edit' ) {
329 return $text;
331 else {
332 return $this->autoConvert($text);
336 if( !$this->mDoContentConvert )
337 return $text;
339 $plang = $this->getPreferredVariant();
340 $fallback = $this->getVariantFallback($plang);
342 $tarray = explode("-{", $text);
343 $tfirst = array_shift($tarray);
344 $text = $this->autoConvert($tfirst);
345 foreach($tarray as $txt) {
346 $marked = explode("}-", $txt);
348 $choice = explode(";", $marked{0});
349 if(!array_key_exists(1, $choice)) {
350 /* a single choice */
351 $text .= $choice{0};
352 } else {
353 $choice1=false;
354 $choice2=false;
355 foreach($choice as $c) {
356 $v = explode(":", $c);
357 if(!array_key_exists(1, $v)) {
358 //syntax error in the markup, give up
359 break;
361 $code = trim($v{0});
362 $content = trim($v{1});
363 if($code == $plang) {
364 $choice1 = $content;
365 break;
367 if($code == $fallback)
368 $choice2 = $content;
370 if ( $choice1 )
371 $text .= $choice1;
372 elseif ( $choice2 )
373 $text .= $choice2;
374 else
375 $text .= $marked{0};
377 if(array_key_exists(1, $marked))
378 $text .= $this->autoConvert($marked{1});
381 return $text;
385 function getVariants() {
386 return array("zh", "zh-cn", "zh-tw", "zh-sg", "zh-hk");
389 function getVariantFallback($v) {
390 switch ($v) {
391 case 'zh': return 'zh-cn'; break;
392 case 'zh-cn': return 'zh-sg'; break;
393 case 'zh-sg': return 'zh-cn'; break;
394 case 'zh-tw': return 'zh-hk'; break;
395 case 'zh-hk': return 'zh-tw'; break;
397 return false;
400 // word segmentation
401 function stripForSearch( $string ) {
402 $fname="LanguageZh::stripForSearch";
403 wfProfileIn( $fname );
405 // eventually this should be a word segmentation
406 // for now just treat each character as a word
407 $t = preg_replace(
408 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
409 "' ' .\"$1\"", $string);
411 //always convert to zh-cn before indexing. it should be
412 //better to use zh-cn for search, since conversion from
413 //Traditional to Simplified is less ambiguous than the
414 //other way around
416 $t = $this->autoConvert($t, 'zh-cn');
417 $t = LanguageUtf8::stripForSearch( $t );
418 wfProfileOut( $fname );
419 return $t;
423 function convertForSearchResult( $termsArray ) {
424 $terms = implode( '|', $termsArray );
425 $terms = implode( '|', $this->autoConvertToAllVariants( $terms ) );
426 $ret = array_unique( explode('|', $terms) );
427 return $ret;
430 function findVariantLink( &$link, &$nt ) {
431 static $count=0; //used to limit this operation
432 static $cache=array();
433 global $wgDisableLangConversion;
434 $pref = $this->getPreferredVariant();
435 if( $count > 50 )
436 return;
437 $count++;
438 $variants = $this->autoConvertToAllVariants($link);
439 if($variants == false) //give up
440 return;
441 foreach( $variants as $v ) {
442 if(isset($cache[$v]))
443 continue;
444 $cache[$v] = 1;
445 $varnt = Title::newFromText( $v );
446 if( $varnt && $varnt->getArticleID() > 0 ) {
447 $nt = $varnt;
448 if( !$wgDisableLangConversion && $pref != 'zh' )
449 $link = $v;
450 break;
455 function getExtraHashOptions() {
456 global $wgUser;
457 $variant = $this->getPreferredVariant();
458 return '!' . $variant ;