Merge "De-duplicate pages in replaceInternal"
[mediawiki.git] / resources / src / mediawiki.libs / CLDRPluralRuleParser.js
blob83c25245524347b8775c95756c71775e5fed9a63
1 /* This is CLDRPluralRuleParser v1.1, ported to MediaWiki ResourceLoader */
3 /**
4 * CLDRPluralRuleParser.js
5 * A parser engine for CLDR plural rules.
7 * Copyright 2012 GPLV3+, Santhosh Thottingal
9 * @version 0.1.0-alpha
10 * @source https://github.com/santhoshtr/CLDRPluralRuleParser
11 * @author Santhosh Thottingal <santhosh.thottingal@gmail.com>
12 * @author Timo Tijhof
13 * @author Amir Aharoni
16 ( function ( mw ) {
17 /**
18 * Evaluates a plural rule in CLDR syntax for a number
19 * @param {string} rule
20 * @param {integer} number
21 * @return {boolean} true if evaluation passed, false if evaluation failed.
24 function pluralRuleParser(rule, number) {
26 Syntax: see http://unicode.org/reports/tr35/#Language_Plural_Rules
27 -----------------------------------------------------------------
28 condition = and_condition ('or' and_condition)*
29 ('@integer' samples)?
30 ('@decimal' samples)?
31 and_condition = relation ('and' relation)*
32 relation = is_relation | in_relation | within_relation
33 is_relation = expr 'is' ('not')? value
34 in_relation = expr (('not')? 'in' | '=' | '!=') range_list
35 within_relation = expr ('not')? 'within' range_list
36 expr = operand (('mod' | '%') value)?
37 operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
38 range_list = (range | value) (',' range_list)*
39 value = digit+
40 digit = 0|1|2|3|4|5|6|7|8|9
41 range = value'..'value
42 samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
43 sampleRange = decimalValue '~' decimalValue
44 decimalValue = value ('.' value)?
47 // we don't evaluate the samples section of the rule. Ignore it.
48 rule = rule.split('@')[0].replace(/^\s*/, '').replace(/\s*$/, '');
50 if (!rule.length) {
51 // empty rule or 'other' rule.
52 return true;
54 // Indicates current position in the rule as we parse through it.
55 // Shared among all parsing functions below.
56 var pos = 0,
57 operand,
58 expression,
59 relation,
60 result,
61 whitespace = makeRegexParser(/^\s+/),
62 value = makeRegexParser(/^\d+/),
63 _n_ = makeStringParser('n'),
64 _i_ = makeStringParser('i'),
65 _f_ = makeStringParser('f'),
66 _t_ = makeStringParser('t'),
67 _v_ = makeStringParser('v'),
68 _w_ = makeStringParser('w'),
69 _is_ = makeStringParser('is'),
70 _isnot_ = makeStringParser('is not'),
71 _isnot_sign_ = makeStringParser('!='),
72 _equal_ = makeStringParser('='),
73 _mod_ = makeStringParser('mod'),
74 _percent_ = makeStringParser('%'),
75 _not_ = makeStringParser('not'),
76 _in_ = makeStringParser('in'),
77 _within_ = makeStringParser('within'),
78 _range_ = makeStringParser('..'),
79 _comma_ = makeStringParser(','),
80 _or_ = makeStringParser('or'),
81 _and_ = makeStringParser('and');
83 function debug() {
84 // console.log.apply(console, arguments);
87 debug('pluralRuleParser', rule, number);
89 // Try parsers until one works, if none work return null
91 function choice(parserSyntax) {
92 return function() {
93 for (var i = 0; i < parserSyntax.length; i++) {
94 var result = parserSyntax[i]();
95 if (result !== null) {
96 return result;
99 return null;
103 // Try several parserSyntax-es in a row.
104 // All must succeed; otherwise, return null.
105 // This is the only eager one.
107 function sequence(parserSyntax) {
108 var originalPos = pos;
109 var result = [];
110 for (var i = 0; i < parserSyntax.length; i++) {
111 var res = parserSyntax[i]();
112 if (res === null) {
113 pos = originalPos;
114 return null;
116 result.push(res);
118 return result;
121 // Run the same parser over and over until it fails.
122 // Must succeed a minimum of n times; otherwise, return null.
124 function nOrMore(n, p) {
125 return function() {
126 var originalPos = pos;
127 var result = [];
128 var parsed = p();
129 while (parsed !== null) {
130 result.push(parsed);
131 parsed = p();
133 if (result.length < n) {
134 pos = originalPos;
135 return null;
137 return result;
141 // Helpers -- just make parserSyntax out of simpler JS builtin types
142 function makeStringParser(s) {
143 var len = s.length;
144 return function() {
145 var result = null;
146 if (rule.substr(pos, len) === s) {
147 result = s;
148 pos += len;
151 return result;
155 function makeRegexParser(regex) {
156 return function() {
157 var matches = rule.substr(pos).match(regex);
158 if (matches === null) {
159 return null;
161 pos += matches[0].length;
162 return matches[0];
167 * integer digits of n.
169 function i() {
170 var result = _i_();
171 if (result === null) {
172 debug(' -- failed i', parseInt(number, 10));
173 return result;
175 result = parseInt(number, 10);
176 debug(' -- passed i ', result);
177 return result;
181 * absolute value of the source number (integer and decimals).
183 function n() {
184 var result = _n_();
185 if (result === null) {
186 debug(' -- failed n ', number);
187 return result;
189 result = parseFloat(number, 10);
190 debug(' -- passed n ', result);
191 return result;
195 * visible fractional digits in n, with trailing zeros.
197 function f() {
198 var result = _f_();
199 if (result === null) {
200 debug(' -- failed f ', number);
201 return result;
203 result = (number + '.').split('.')[1] || 0;
204 debug(' -- passed f ', result);
205 return result;
209 * visible fractional digits in n, without trailing zeros.
211 function t() {
212 var result = _t_();
213 if (result === null) {
214 debug(' -- failed t ', number);
215 return result;
217 result = (number + '.').split('.')[1].replace(/0$/, '') || 0;
218 debug(' -- passed t ', result);
219 return result;
223 * number of visible fraction digits in n, with trailing zeros.
225 function v() {
226 var result = _v_();
227 if (result === null) {
228 debug(' -- failed v ', number);
229 return result;
231 result = (number + '.').split('.')[1].length || 0;
232 debug(' -- passed v ', result);
233 return result;
237 * number of visible fraction digits in n, without trailing zeros.
239 function w() {
240 var result = _w_();
241 if (result === null) {
242 debug(' -- failed w ', number);
243 return result;
245 result = (number + '.').split('.')[1].replace(/0$/, '').length || 0;
246 debug(' -- passed w ', result);
247 return result;
250 // operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
251 operand = choice([n, i, f, t, v, w]);
253 // expr = operand (('mod' | '%') value)?
254 expression = choice([mod, operand]);
256 function mod() {
257 var result = sequence([operand, whitespace, choice([_mod_, _percent_]), whitespace, value]);
258 if (result === null) {
259 debug(' -- failed mod');
260 return null;
262 debug(' -- passed ' + parseInt(result[0], 10) + ' ' + result[2] + ' ' + parseInt(result[4], 10));
263 return parseInt(result[0], 10) % parseInt(result[4], 10);
266 function not() {
267 var result = sequence([whitespace, _not_]);
268 if (result === null) {
269 debug(' -- failed not');
270 return null;
273 return result[1];
276 // is_relation = expr 'is' ('not')? value
277 function is() {
278 var result = sequence([expression, whitespace, choice([_is_]), whitespace, value]);
279 if (result !== null) {
280 debug(' -- passed is : ' + result[0] + ' == ' + parseInt(result[4], 10));
281 return result[0] === parseInt(result[4], 10);
283 debug(' -- failed is');
284 return null;
287 // is_relation = expr 'is' ('not')? value
288 function isnot() {
289 var result = sequence([expression, whitespace, choice([_isnot_, _isnot_sign_]), whitespace, value]);
290 if (result !== null) {
291 debug(' -- passed isnot: ' + result[0] + ' != ' + parseInt(result[4], 10));
292 return result[0] !== parseInt(result[4], 10);
294 debug(' -- failed isnot');
295 return null;
298 function not_in() {
299 var result = sequence([expression, whitespace, _isnot_sign_, whitespace, rangeList]);
300 if (result !== null) {
301 debug(' -- passed not_in: ' + result[0] + ' != ' + result[4]);
302 var range_list = result[4];
303 for (var i = 0; i < range_list.length; i++) {
304 if (parseInt(range_list[i], 10) === parseInt(result[0], 10)) {
305 return false;
308 return true;
310 debug(' -- failed not_in');
311 return null;
314 // range_list = (range | value) (',' range_list)*
315 function rangeList() {
316 var result = sequence([choice([range, value]), nOrMore(0, rangeTail)]);
317 var resultList = [];
318 if (result !== null) {
319 resultList = resultList.concat(result[0]);
320 if (result[1][0]) {
321 resultList = resultList.concat(result[1][0]);
323 return resultList;
325 debug(' -- failed rangeList');
326 return null;
329 function rangeTail() {
330 // ',' range_list
331 var result = sequence([_comma_, rangeList]);
332 if (result !== null) {
333 return result[1];
335 debug(' -- failed rangeTail');
336 return null;
339 // range = value'..'value
341 function range() {
342 var i;
343 var result = sequence([value, _range_, value]);
344 if (result !== null) {
345 debug(' -- passed range');
346 var array = [];
347 var left = parseInt(result[0], 10);
348 var right = parseInt(result[2], 10);
349 for (i = left; i <= right; i++) {
350 array.push(i);
352 return array;
354 debug(' -- failed range');
355 return null;
358 function _in() {
359 // in_relation = expr ('not')? 'in' range_list
360 var result = sequence([expression, nOrMore(0, not), whitespace, choice([_in_, _equal_]), whitespace, rangeList]);
361 if (result !== null) {
362 debug(' -- passed _in:' + result);
363 var range_list = result[5];
364 for (var i = 0; i < range_list.length; i++) {
365 if (parseInt(range_list[i], 10) === parseInt(result[0], 10)) {
366 return (result[1][0] !== 'not');
369 return (result[1][0] === 'not');
371 debug(' -- failed _in ');
372 return null;
376 * The difference between in and within is that in only includes integers in the specified range,
377 * while within includes all values.
380 function within() {
381 // within_relation = expr ('not')? 'within' range_list
382 var result = sequence([expression, nOrMore(0, not), whitespace, _within_, whitespace, rangeList]);
383 if (result !== null) {
384 debug(' -- passed within');
385 var range_list = result[5];
386 if ((result[0] >= parseInt(range_list[0], 10)) &&
387 (result[0] < parseInt(range_list[range_list.length - 1], 10))) {
388 return (result[1][0] !== 'not');
390 return (result[1][0] === 'not');
392 debug(' -- failed within ');
393 return null;
396 // relation = is_relation | in_relation | within_relation
397 relation = choice([is, not_in, isnot, _in, within]);
399 // and_condition = relation ('and' relation)*
400 function and() {
401 var result = sequence([relation, nOrMore(0, andTail)]);
402 if (result) {
403 if (!result[0]) {
404 return false;
406 for (var i = 0; i < result[1].length; i++) {
407 if (!result[1][i]) {
408 return false;
411 return true;
413 debug(' -- failed and');
414 return null;
417 // ('and' relation)*
418 function andTail() {
419 var result = sequence([whitespace, _and_, whitespace, relation]);
420 if (result !== null) {
421 debug(' -- passed andTail' + result);
422 return result[3];
424 debug(' -- failed andTail');
425 return null;
428 // ('or' and_condition)*
429 function orTail() {
430 var result = sequence([whitespace, _or_, whitespace, and]);
431 if (result !== null) {
432 debug(' -- passed orTail: ' + result[3]);
433 return result[3];
435 debug(' -- failed orTail');
436 return null;
440 // condition = and_condition ('or' and_condition)*
441 function condition() {
442 var result = sequence([and, nOrMore(0, orTail)]);
443 if (result) {
444 for (var i = 0; i < result[1].length; i++) {
445 if (result[1][i]) {
446 return true;
449 return result[0];
452 return false;
455 result = condition();
457 * For success, the pos must have gotten to the end of the rule
458 * and returned a non-null.
459 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
461 if (result === null) {
462 throw new Error('Parse error at position ' + pos.toString() + ' for rule: ' + rule);
465 if (pos !== rule.length) {
466 debug('Warning: Rule not parsed completely. Parser stopped at ' + rule.substr(0, pos) + ' for rule: ' + rule);
469 return result;
472 /* pluralRuleParser ends here */
473 mw.libs.pluralRuleParser = pluralRuleParser;
475 } )( mediaWiki );