3 * @author Niklas Laxström, Tim Starling
5 * @copyright Copyright © 2010-2012, Niklas Laxström
6 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
13 * Helper class for converting rules to reverse polish notation (RPN).
15 class CLDRPluralRuleConverter
{
24 * The current position
31 * The past-the-end position
42 public $operators = array();
49 public $operands = array();
52 * Precedence levels. Note that there's no need to worry about associativity
53 * for the level 4 operators, since they return boolean and don't accept
56 private static $precedence = array(
71 * A character list defining whitespace, for use in strspn() etc.
73 const WHITESPACE_CLASS
= " \t\r\n";
76 * Same for digits. Note that the grammar given in UTS #35 doesn't allow
77 * negative numbers or decimal separators.
79 const NUMBER_CLASS
= '0123456789';
82 * A character list of symbolic operands.
84 const OPERAND_SYMBOLS
= 'nivwft';
87 * An anchored regular expression which matches a word at the current offset.
89 const WORD_REGEX
= '/[a-zA-Z@]+/A';
92 * Convert a rule to RPN. This is the only public entry point.
94 * @param string $rule The rule to convert
95 * @return string The RPN representation of the rule
97 public static function convert( $rule ) {
98 $parser = new self( $rule );
100 return $parser->doConvert();
104 * Private constructor.
106 protected function __construct( $rule ) {
109 $this->end
= strlen( $rule );
115 * @return string The RPN representation of the rule (e.g. "5 3 mod n is")
117 protected function doConvert() {
118 $expectOperator = true;
120 // Iterate through all tokens, saving the operators and operands to a
121 // stack per Dijkstra's shunting yard algorithm.
122 /** @var CLDRPluralRuleConverterOperator $token */
123 while ( false !== ( $token = $this->nextToken() ) ) {
124 // In this grammar, there are only binary operators, so every valid
125 // rule string will alternate between operator and operand tokens.
126 $expectOperator = !$expectOperator;
128 if ( $token instanceof CLDRPluralRuleConverterExpression
) {
130 if ( $expectOperator ) {
131 $token->error( 'unexpected operand' );
133 $this->operands
[] = $token;
137 if ( !$expectOperator ) {
138 $token->error( 'unexpected operator' );
140 // Resolve higher precedence levels
141 $lastOp = end( $this->operators
);
142 while ( $lastOp && self
::$precedence[$token->name
] <= self
::$precedence[$lastOp->name
] ) {
143 $this->doOperation( $lastOp, $this->operands
);
144 array_pop( $this->operators
);
145 $lastOp = end( $this->operators
);
147 $this->operators
[] = $token;
151 // Finish off the stack
152 while ( $op = array_pop( $this->operators
) ) {
153 $this->doOperation( $op, $this->operands
);
156 // Make sure the result is sane. The first case is possible for an empty
157 // string input, the second should be unreachable.
158 if ( !count( $this->operands
) ) {
159 $this->error( 'condition expected' );
160 } elseif ( count( $this->operands
) > 1 ) {
161 $this->error( 'missing operator or too many operands' );
164 $value = $this->operands
[0];
165 if ( $value->type
!== 'boolean' ) {
166 $this->error( 'the result must have a boolean type' );
169 return $this->operands
[0]->rpn
;
173 * Fetch the next token from the input string.
175 * @return CLDRPluralRuleConverterFragment The next token
177 protected function nextToken() {
178 if ( $this->pos
>= $this->end
) {
183 $length = strspn( $this->rule
, self
::WHITESPACE_CLASS
, $this->pos
);
184 $this->pos +
= $length;
186 if ( $this->pos
>= $this->end
) {
191 $length = strspn( $this->rule
, self
::NUMBER_CLASS
, $this->pos
);
192 if ( $length !== 0 ) {
193 $token = $this->newNumber( substr( $this->rule
, $this->pos
, $length ), $this->pos
);
194 $this->pos +
= $length;
199 // Two-character operators
200 $op2 = substr( $this->rule
, $this->pos
, 2 );
201 if ( $op2 === '..' ||
$op2 === '!=' ) {
202 $token = $this->newOperator( $op2, $this->pos
, 2 );
208 // Single-character operators
209 $op1 = $this->rule
[$this->pos
];
210 if ( $op1 === ',' ||
$op1 === '=' ||
$op1 === '%' ) {
211 $token = $this->newOperator( $op1, $this->pos
, 1 );
218 if ( !preg_match( self
::WORD_REGEX
, $this->rule
, $m, 0, $this->pos
) ) {
219 $this->error( 'unexpected character "' . $this->rule
[$this->pos
] . '"' );
221 $word1 = strtolower( $m[0] );
223 $nextTokenPos = $this->pos +
strlen( $word1 );
224 if ( $word1 === 'not' ||
$word1 === 'is' ) {
225 // Look ahead one word
226 $nextTokenPos +
= strspn( $this->rule
, self
::WHITESPACE_CLASS
, $nextTokenPos );
227 if ( $nextTokenPos < $this->end
228 && preg_match( self
::WORD_REGEX
, $this->rule
, $m, 0, $nextTokenPos )
230 $word2 = strtolower( $m[0] );
231 $nextTokenPos +
= strlen( $word2 );
235 // Two-word operators like "is not" take precedence over single-word operators like "is"
236 if ( $word2 !== '' ) {
237 $bothWords = "{$word1}-{$word2}";
238 if ( isset( self
::$precedence[$bothWords] ) ) {
239 $token = $this->newOperator( $bothWords, $this->pos
, $nextTokenPos - $this->pos
);
240 $this->pos
= $nextTokenPos;
246 // Single-word operators
247 if ( isset( self
::$precedence[$word1] ) ) {
248 $token = $this->newOperator( $word1, $this->pos
, strlen( $word1 ) );
249 $this->pos +
= strlen( $word1 );
254 // The single-character operand symbols
255 if ( strpos( self
::OPERAND_SYMBOLS
, $word1 ) !== false ) {
256 $token = $this->newNumber( $word1, $this->pos
);
263 if ( $word1 === '@integer' ||
$word1 === '@decimal' ) {
264 // Samples are like comments, they have no effect on rule evaluation.
265 // They run from the first sample indicator to the end of the string.
266 $this->pos
= $this->end
;
271 $this->error( 'unrecognised word' );
275 * For the binary operator $op, pop its operands off the stack and push
276 * a fragment with rpn and type members describing the result of that
279 * @param CLDRPluralRuleConverterOperator $op
281 protected function doOperation( $op ) {
282 if ( count( $this->operands
) < 2 ) {
283 $op->error( 'missing operand' );
285 $right = array_pop( $this->operands
);
286 $left = array_pop( $this->operands
);
287 $result = $op->operate( $left, $right );
288 $this->operands
[] = $result;
292 * Create a numerical expression object
294 * @param string $text
296 * @return CLDRPluralRuleConverterExpression The numerical expression
298 protected function newNumber( $text, $pos ) {
299 return new CLDRPluralRuleConverterExpression( $this, 'number', $text, $pos, strlen( $text ) );
303 * Create a binary operator
305 * @param string $type
308 * @return CLDRPluralRuleConverterOperator The operator
310 protected function newOperator( $type, $pos, $length ) {
311 return new CLDRPluralRuleConverterOperator( $this, $type, $pos, $length );
317 protected function error( $message ) {
318 throw new CLDRPluralRuleError( $message );