Throw exception in importDump instead of dumping a random backtrace and erroring
[mediawiki.git] / languages / ConverterRule.php
bloba5e960f97e8d62fa5df2949ea890bf74fd20af75
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
18 * @file
19 * @ingroup Language
22 /**
23 * Parser for rules of language conversion , parse rules in -{ }- tag.
24 * @ingroup Language
25 * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com>
27 class ConverterRule {
28 public $mText; // original text in -{text}-
29 public $mConverter; // LanguageConverter object
30 public $mRuleDisplay = '';
31 public $mRuleTitle = false;
32 public $mRules = '';// string : the text of the rules
33 public $mRulesAction = 'none';
34 public $mFlags = array();
35 public $mVariantFlags = array();
36 public $mConvTable = array();
37 public $mBidtable = array();// array of the translation in each variant
38 public $mUnidtable = array();// array of the translation in each variant
40 /**
41 * Constructor
43 * @param string $text The text between -{ and }-
44 * @param LanguageConverter $converter
46 public function __construct( $text, $converter ) {
47 $this->mText = $text;
48 $this->mConverter = $converter;
51 /**
52 * Check if variants array in convert array.
54 * @param array|string $variants Variant language code
55 * @return string Translated text
57 public function getTextInBidtable( $variants ) {
58 $variants = (array)$variants;
59 if ( !$variants ) {
60 return false;
62 foreach ( $variants as $variant ) {
63 if ( isset( $this->mBidtable[$variant] ) ) {
64 return $this->mBidtable[$variant];
67 return false;
70 /**
71 * Parse flags with syntax -{FLAG| ... }-
72 * @private
74 function parseFlags() {
75 $text = $this->mText;
76 $flags = array();
77 $variantFlags = array();
79 $sepPos = strpos( $text, '|' );
80 if ( $sepPos !== false ) {
81 $validFlags = $this->mConverter->mFlags;
82 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
83 foreach ( $f as $ff ) {
84 $ff = trim( $ff );
85 if ( isset( $validFlags[$ff] ) ) {
86 $flags[$validFlags[$ff]] = true;
89 $text = strval( substr( $text, $sepPos + 1 ) );
92 if ( !$flags ) {
93 $flags['S'] = true;
94 } elseif ( isset( $flags['R'] ) ) {
95 $flags = array( 'R' => true );// remove other flags
96 } elseif ( isset( $flags['N'] ) ) {
97 $flags = array( 'N' => true );// remove other flags
98 } elseif ( isset( $flags['-'] ) ) {
99 $flags = array( '-' => true );// remove other flags
100 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
101 $flags['H'] = true;
102 } elseif ( isset( $flags['H'] ) ) {
103 // replace A flag, and remove other flags except T
104 $temp = array( '+' => true, 'H' => true );
105 if ( isset( $flags['T'] ) ) {
106 $temp['T'] = true;
108 if ( isset( $flags['D'] ) ) {
109 $temp['D'] = true;
111 $flags = $temp;
112 } else {
113 if ( isset( $flags['A'] ) ) {
114 $flags['+'] = true;
115 $flags['S'] = true;
117 if ( isset( $flags['D'] ) ) {
118 unset( $flags['S'] );
120 // try to find flags like "zh-hans", "zh-hant"
121 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
122 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
123 if ( $variantFlags ) {
124 $variantFlags = array_flip( $variantFlags );
125 $flags = array();
128 $this->mVariantFlags = $variantFlags;
129 $this->mRules = $text;
130 $this->mFlags = $flags;
134 * Generate conversion table.
135 * @private
137 function parseRules() {
138 $rules = $this->mRules;
139 $bidtable = array();
140 $unidtable = array();
141 $variants = $this->mConverter->mVariants;
142 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
144 // Split according to $varsep_pattern, but ignore semicolons from HTML entities
145 $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
146 $choice = preg_split( $varsep_pattern, $rules );
147 $choice = str_replace( "\x01", ';', $choice );
149 foreach ( $choice as $c ) {
150 $v = explode( ':', $c, 2 );
151 if ( count( $v ) != 2 ) {
152 // syntax error, skip
153 continue;
155 $to = trim( $v[1] );
156 $v = trim( $v[0] );
157 $u = explode( '=>', $v, 2 );
158 // if $to is empty, strtr() could return a wrong result
159 if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) {
160 $bidtable[$v] = $to;
161 } elseif ( count( $u ) == 2 ) {
162 $from = trim( $u[0] );
163 $v = trim( $u[1] );
164 if ( array_key_exists( $v, $unidtable )
165 && !is_array( $unidtable[$v] )
166 && $to
167 && in_array( $v, $variants ) ) {
168 $unidtable[$v] = array( $from => $to );
169 } elseif ( $to && in_array( $v, $variants ) ) {
170 $unidtable[$v][$from] = $to;
173 // syntax error, pass
174 if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
175 $bidtable = array();
176 $unidtable = array();
177 break;
180 $this->mBidtable = $bidtable;
181 $this->mUnidtable = $unidtable;
185 * @private
187 * @return string
189 function getRulesDesc() {
190 $codesep = $this->mConverter->mDescCodeSep;
191 $varsep = $this->mConverter->mDescVarSep;
192 $text = '';
193 foreach ( $this->mBidtable as $k => $v ) {
194 $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
196 foreach ( $this->mUnidtable as $k => $a ) {
197 foreach ( $a as $from => $to ) {
198 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
199 "$codesep$to$varsep";
202 return $text;
206 * Parse rules conversion.
207 * @private
209 * @param string $variant
211 * @return string
213 function getRuleConvertedStr( $variant ) {
214 $bidtable = $this->mBidtable;
215 $unidtable = $this->mUnidtable;
217 if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
218 return $this->mRules;
219 } else {
220 // display current variant in bidirectional array
221 $disp = $this->getTextInBidtable( $variant );
222 // or display current variant in fallbacks
223 if ( !$disp ) {
224 $disp = $this->getTextInBidtable(
225 $this->mConverter->getVariantFallbacks( $variant ) );
227 // or display current variant in unidirectional array
228 if ( !$disp && array_key_exists( $variant, $unidtable ) ) {
229 $disp = array_values( $unidtable[$variant] );
230 $disp = $disp[0];
232 // or display frist text under disable manual convert
233 if ( !$disp && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
234 if ( count( $bidtable ) > 0 ) {
235 $disp = array_values( $bidtable );
236 $disp = $disp[0];
237 } else {
238 $disp = array_values( $unidtable );
239 $disp = array_values( $disp[0] );
240 $disp = $disp[0];
243 return $disp;
248 * Similar to getRuleConvertedStr(), but this prefers to use original
249 * page title if $variant === $this->mConverter->mMainLanguageCode
250 * and may return false in this case (so this title conversion rule
251 * will be ignored and the original title is shown).
253 * @since 1.22
254 * @param string $variant The variant code to display page title in
255 * @return string|bool The converted title or false if just page name
257 function getRuleConvertedTitle( $variant ) {
258 if ( $variant === $this->mConverter->mMainLanguageCode ) {
259 // If a string targeting exactly this variant is set,
260 // use it. Otherwise, just return false, so the real
261 // page name can be shown (and because variant === main,
262 // there'll be no further automatic conversion).
263 $disp = $this->getTextInBidtable( $variant );
264 if ( $disp ) {
265 return $disp;
267 if ( array_key_exists( $variant, $this->mUnidtable ) ) {
268 $disp = array_values( $this->mUnidtable[$variant] );
269 $disp = $disp[0];
271 // Assigned above or still false.
272 return $disp;
273 } else {
274 return $this->getRuleConvertedStr( $variant );
279 * Generate conversion table for all text.
280 * @private
282 function generateConvTable() {
283 // Special case optimisation
284 if ( !$this->mBidtable && !$this->mUnidtable ) {
285 $this->mConvTable = array();
286 return;
289 $bidtable = $this->mBidtable;
290 $unidtable = $this->mUnidtable;
291 $manLevel = $this->mConverter->mManualLevel;
293 $vmarked = array();
294 foreach ( $this->mConverter->mVariants as $v ) {
295 /* for bidirectional array
296 fill in the missing variants, if any,
297 with fallbacks */
298 if ( !isset( $bidtable[$v] ) ) {
299 $variantFallbacks =
300 $this->mConverter->getVariantFallbacks( $v );
301 $vf = $this->getTextInBidtable( $variantFallbacks );
302 if ( $vf ) {
303 $bidtable[$v] = $vf;
307 if ( isset( $bidtable[$v] ) ) {
308 foreach ( $vmarked as $vo ) {
309 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
310 // or -{H|zh:WordZh;zh-tw:WordTw}-
311 // or -{-|zh:WordZh;zh-tw:WordTw}-
312 // to introduce a custom mapping between
313 // words WordZh and WordTw in the whole text
314 if ( $manLevel[$v] == 'bidirectional' ) {
315 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
317 if ( $manLevel[$vo] == 'bidirectional' ) {
318 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
321 $vmarked[] = $v;
323 /* for unidirectional array fill to convert tables */
324 if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
325 && isset( $unidtable[$v] )
327 if ( isset( $this->mConvTable[$v] ) ) {
328 $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
329 } else {
330 $this->mConvTable[$v] = $unidtable[$v];
337 * Parse rules and flags.
338 * @param string $variant Variant language code
340 public function parse( $variant = null ) {
341 if ( !$variant ) {
342 $variant = $this->mConverter->getPreferredVariant();
345 $this->parseFlags();
346 $flags = $this->mFlags;
348 // convert to specified variant
349 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
350 if ( $this->mVariantFlags ) {
351 // check if current variant in flags
352 if ( isset( $this->mVariantFlags[$variant] ) ) {
353 // then convert <text to convert> to current language
354 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
355 $variant );
356 } else {
357 // if current variant no in flags,
358 // then we check its fallback variants.
359 $variantFallbacks =
360 $this->mConverter->getVariantFallbacks( $variant );
361 if ( is_array( $variantFallbacks ) ) {
362 foreach ( $variantFallbacks as $variantFallback ) {
363 // if current variant's fallback exist in flags
364 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
365 // then convert <text to convert> to fallback language
366 $this->mRules =
367 $this->mConverter->autoConvert( $this->mRules,
368 $variantFallback );
369 break;
374 $this->mFlags = $flags = array( 'R' => true );
377 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
378 // decode => HTML entities modified by Sanitizer::removeHTMLtags
379 $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
380 $this->parseRules();
382 $rules = $this->mRules;
384 if ( !$this->mBidtable && !$this->mUnidtable ) {
385 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
386 // fill all variants if text in -{A/H/-|text} without rules
387 foreach ( $this->mConverter->mVariants as $v ) {
388 $this->mBidtable[$v] = $rules;
390 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
391 $this->mFlags = $flags = array( 'R' => true );
395 $this->mRuleDisplay = false;
396 foreach ( $flags as $flag => $unused ) {
397 switch ( $flag ) {
398 case 'R':
399 // if we don't do content convert, still strip the -{}- tags
400 $this->mRuleDisplay = $rules;
401 break;
402 case 'N':
403 // process N flag: output current variant name
404 $ruleVar = trim( $rules );
405 if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
406 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
407 } else {
408 $this->mRuleDisplay = '';
410 break;
411 case 'D':
412 // process D flag: output rules description
413 $this->mRuleDisplay = $this->getRulesDesc();
414 break;
415 case 'H':
416 // process H,- flag or T only: output nothing
417 $this->mRuleDisplay = '';
418 break;
419 case '-':
420 $this->mRulesAction = 'remove';
421 $this->mRuleDisplay = '';
422 break;
423 case '+':
424 $this->mRulesAction = 'add';
425 $this->mRuleDisplay = '';
426 break;
427 case 'S':
428 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
429 break;
430 case 'T':
431 $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
432 $this->mRuleDisplay = '';
433 break;
434 default:
435 // ignore unknown flags (but see error case below)
438 if ( $this->mRuleDisplay === false ) {
439 $this->mRuleDisplay = '<span class="error">'
440 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
441 . '</span>';
444 $this->generateConvTable();
448 * @todo FIXME: code this function :)
450 public function hasRules() {
451 // TODO:
455 * Get display text on markup -{...}-
456 * @return string
458 public function getDisplay() {
459 return $this->mRuleDisplay;
463 * Get converted title.
464 * @return string
466 public function getTitle() {
467 return $this->mRuleTitle;
471 * Return how deal with conversion rules.
472 * @return string
474 public function getRulesAction() {
475 return $this->mRulesAction;
479 * Get conversion table. (bidirectional and unidirectional
480 * conversion table)
481 * @return array
483 public function getConvTable() {
484 return $this->mConvTable;
488 * Get conversion rules string.
489 * @return string
491 public function getRules() {
492 return $this->mRules;
496 * Get conversion flags.
497 * @return array
499 public function getFlags() {
500 return $this->mFlags;