new namespace names
[mediawiki.git] / languages / LanguageZh.php
blobbac6d1eabad331c05eda17a378153a9dc9dbb481
1 <?php
2 require_once( "includes/ZhClient.php" );
3 require_once( "LanguageZh_cn.php");
4 require_once( "LanguageZh_tw.php");
5 require_once( "LanguageZh_sg.php");
6 require_once( "LanguageZh_hk.php");
8 /* class that handles both Traditional and Simplified Chinese
9 right now it only distinguish zh_cn and zh_tw (actuall, zh_cn and
10 non-zh_cn), will add support for zh_sg, zh_hk, etc, later.
12 class LanguageZh extends LanguageZh_cn {
14 var $mZhLanguageCode=false;
15 var $mZhClient=false;
16 function LanguageZh() {
17 global $wgUseZhdaemon, $wgZhdaemonHost, $wgZhdaemonPort;
18 global $wgDisableLangConversion, $wgUser;
20 if($wgUseZhdaemon) {
21 $this->mZhClient=new ZhClient($wgZhdaemonHost, $wgZhdaemonPort);
22 if(!$this->mZhClient->isconnected())
23 $this->mZhClient = false;
25 // fallback to fake client
26 if($this->mZhClient == false)
27 $this->mZhClient=new ZhClientFake();
30 /*
31 get preferred language variants.
33 function getPreferredVariant() {
34 global $wgUser, $wgRequest;
36 if($this->mZhLanguageCode)
37 return $this->mZhLanguageCode;
39 // see if the preference is set in the request
40 $zhreq = $wgRequest->getText( 'variant' );
41 if( in_array( $zhreq, $this->getVariants() ) ) {
42 $this->mZhLanguageCode = $zhreq;
43 return $zhreq;
46 // get language variant preference from logged in users
47 if($wgUser->getID()!=0) {
48 $this->mZhLanguageCode = $wgUser->getOption('variant');
51 if( !$this->mZhLanguageCode ) {
52 // see if some zh- variant is set in the http header,
53 $this->mZhLanguageCode="zh";
54 $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
55 $zh = strstr($header, 'zh-');
56 if($zh) {
57 $this->mZhLanguageCode = substr($zh,0,5);
60 return $this->mZhLanguageCode;
63 # this should give much better diff info
64 function segmentForDiff( $text ) {
65 return preg_replace(
66 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
67 "' ' .\"$1\"", $text);
70 function unsegmentForDiff( $text ) {
71 return preg_replace(
72 "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
73 "\"$1\"", $text);
76 function autoConvert($text, $toVariant=false) {
77 if(!$toVariant)
78 $toVariant = $this->getPreferredVariant();
79 if($toVariant == 'zh')
80 return $text;
81 $fname="zhautoConvert";
82 wfProfileIn( $fname );
83 $t = $this->mZhClient->convert($text, $toVariant);
84 wfProfileOut( $fname );
85 return $t;
88 function autoConvertToAllVariants($text) {
89 $fname="zhautoConvertToAll";
90 wfProfileIn( $fname );
91 $ret = $this->mZhClient->convertToAllVariants($text);
92 if($ret == false) {//fall back...
93 $ret = ZhClientFake::autoConvertToAllVariants($text);
95 wfProfileOut( $fname );
96 return $ret;
99 # convert text to different variants of a language. the automatic
100 # conversion is done in autoConvert(). here we parse the text
101 # marked with -{}-, which specifies special conversions of the
102 # text that can not be accomplished in autoConvert()
104 # syntax of the markup:
105 # -{code1:text1;code2:text2;...}- or
106 # -{text}- in which case no conversion should take place for text
107 function convert( $text , $isTitle=false) {
108 global $wgDisableLangConversion;
110 if($wgDisableLangConversion)
111 return $text;
113 // no conversion if redirecting
114 if(strtolower( substr( $text,0,9 ) ) == "#redirect") {
115 return $text;
118 if( $isTitle ) {
119 global $wgRequest;
120 $isredir = $wgRequest->getText( 'redirect', 'yes' );
121 $action = $wgRequest->getText( 'action' );
122 if ( $isredir == 'no' || $action == 'edit' ) {
123 return $text;
125 else {
126 $text = $this->convertTitle($text);
127 return $text;
131 $plang = $this->getPreferredVariant();
132 $fallback = $this->getVariantFallback($plang);
134 $tarray = explode("-{", $text);
135 $tfirst = array_shift($tarray);
136 $text = $this->autoConvert($tfirst);
137 foreach($tarray as $txt) {
138 $marked = explode("}-", $txt);
140 $choice = explode(";", $marked{0});
141 if(!array_key_exists(1, $choice)) {
142 /* a single choice */
143 $text .= $choice{0};
144 } else {
145 $choice1=false;
146 $choice2=false;
147 foreach($choice as $c) {
148 $v = explode(":", $c);
149 if(!array_key_exists(1, $v)) {
150 //syntax error in the markup, give up
151 break;
153 $code = trim($v{0});
154 $content = trim($v{1});
155 if($code == $plang) {
156 $choice1 = $content;
157 break;
159 if($code == $fallback)
160 $choice2 = $content;
162 if ( $choice1 )
163 $text .= $choice1;
164 elseif ( $choice2 )
165 $text .= $choice2;
166 else
167 $text .= $marked{0};
169 if(array_key_exists(1, $marked))
170 $text .= $this->autoConvert($marked{1});
173 return $text;
177 # only convert titles having more than one character
178 function convertTitle($text) {
179 $len=0;
180 if( function_exists( 'mb_strlen' ) )
181 $len = mb_strlen($text);
182 else
183 $len = strlen($text)/3;
184 if($len>1)
185 return $this->autoConvert( $text);
186 return $text;
189 function getVariants() {
190 return array("zh", "zh-cn", "zh-tw", "zh-sg", "zh-hk");
193 function getVariantFallback($v) {
194 switch ($v) {
195 case 'zh': return 'zh-cn'; break;
196 case 'zh-cn': return 'zh-sg'; break;
197 case 'zh-sg': return 'zh-cn'; break;
198 case 'zh-tw': return 'zh-hk'; break;
199 case 'zh-hk': return 'zh-tw'; break;
201 return false;
204 // word segmentation through ZhClient
205 function stripForSearch( $string ) {
206 $fname="zhsegment";
207 wfProfileIn( $fname );
208 //always convert to zh-cn before indexing. it should be
209 //better to use zh-cn for search, since conversion from
210 //Traditional to Simplified is less ambiguous than the
211 //other way around
212 $t = $this->mZhClient->segment($string);
213 $t = $this->autoConvert($t, 'zh-cn');
214 $t = LanguageUtf8::stripForSearch( $t );
215 wfProfileOut( $fname );
216 return $t;
220 function convertForSearchResult( $termsArray ) {
221 $terms = implode( '|', $termsArray );
222 $terms = implode( '|', $this->autoConvertToAllVariants( $terms ) );
223 $ret = array_unique( explode('|', $terms) );
224 return $ret;
227 function findVariantLink( &$link, &$nt ) {
228 static $count=0; //used to limit this operation
229 static $cache=array();
230 global $wgDisableLangConversion;
231 $pref = $this->getPreferredVariant();
232 if( $wgDisableLangConversion || $pref == 'zh' || $count > 50)
233 return;
234 $count++;
235 $variants = $this->autoConvertToAllVariants($link);
236 if($variants == false) //give up
237 return;
238 foreach( $variants as $v ) {
239 if(isset($cache[$v]))
240 continue;
241 $cache[$v] = 1;
242 $varnt = Title::newFromText( $v );
243 if( $varnt && $varnt->getArticleID() > 0 ) {
244 $nt = $varnt;
245 $link = $v;
246 break;
251 function getExtraHashOptions() {
252 global $wgUser;
253 $variant = $this->getPreferredVariant();
254 return '!' . $variant ;