etc/services - sync with NetBSD-8
[minix.git] / lib / libintl / plural_parser.c
blobb673e170505853be878b3382e0365ec53c03a3f7
1 /* $NetBSD: plural_parser.c,v 1.2 2007/01/17 23:24:22 hubertf Exp $ */
3 /*-
4 * Copyright (c) 2005 Citrus Project,
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
30 #include <sys/cdefs.h>
31 __RCSID("$NetBSD: plural_parser.c,v 1.2 2007/01/17 23:24:22 hubertf Exp $");
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <citrus/citrus_namespace.h>
38 #include <citrus/citrus_region.h>
39 #include <citrus/citrus_memstream.h>
40 #include <citrus/citrus_bcs.h>
41 #include "plural_parser.h"
43 #if defined(TEST_TOKENIZER) || defined(TEST_PARSER)
44 #define ALLOW_EMPTY
45 #define ALLOW_ARBITRARY_IDENTIFIER
46 #endif
48 #define MAX_LEN_ATOM 10
49 #define MAX_NUM_OPERANDS 3
51 #define T_EOF EOF
52 #define T_NONE 0x100
53 #define T_LAND 0x101 /* && */
54 #define T_LOR 0x102 /* || */
55 #define T_EQUALITY 0x103 /* == or != */
56 #define T_RELATIONAL 0x104 /* <, >, <= or >= */
57 #define T_ADDITIVE 0x105 /* + or - */
58 #define T_MULTIPLICATIVE 0x106 /* *, / or % */
59 #define T_IDENTIFIER 0x200
60 #define T_CONSTANT 0x201
61 #define T_ILCHAR 0x300
62 #define T_TOOLONG 0x301
63 #define T_ILTOKEN 0x302
64 #define T_ILEND 0x303
65 #define T_NOMEM 0x304
66 #define T_NOTFOUND 0x305
67 #define T_ILPLURAL 0x306
68 #define T_IS_OPERATOR(t) ((t) < 0x200)
69 #define T_IS_ERROR(t) ((t) >= 0x300)
71 #define OP_EQ ('='+'=')
72 #define OP_NEQ ('!'+'=')
73 #define OP_LTEQ ('<'+'=')
74 #define OP_GTEQ ('>'+'=')
76 #define PLURAL_NUMBER_SYMBOL "n"
77 #define NPLURALS_SYMBOL "nplurals"
78 #define LEN_NPLURAL_SYMBOL (sizeof (NPLURALS_SYMBOL) -1)
79 #define PLURAL_SYMBOL "plural"
80 #define LEN_PLURAL_SYMBOL (sizeof (PLURAL_SYMBOL) -1)
81 #define PLURAL_FORMS "Plural-Forms:"
82 #define LEN_PLURAL_FORMS (sizeof (PLURAL_FORMS) -1)
84 /* ----------------------------------------------------------------------
85 * tokenizer part
88 union token_data
90 unsigned long constant;
91 #ifdef ALLOW_ARBITRARY_IDENTIFIER
92 char identifier[MAX_LEN_ATOM+1];
93 #endif
94 char op;
97 struct tokenizer_context
99 struct _memstream memstream;
100 struct {
101 int token;
102 union token_data token_data;
103 } token0;
106 /* initialize a tokenizer context */
107 static void
108 init_tokenizer_context(struct tokenizer_context *tcx)
110 tcx->token0.token = T_NONE;
113 /* get an atom (identifier or constant) */
114 static int
115 tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data)
117 int ch, len;
118 char buf[MAX_LEN_ATOM+1];
120 len = 0;
121 while (/*CONSTCOND*/1) {
122 ch = _memstream_getc(&tcx->memstream);
123 if (!(_bcs_isalnum(ch) || ch == '_')) {
124 _memstream_ungetc(&tcx->memstream, ch);
125 break;
127 if (len == MAX_LEN_ATOM)
128 return T_TOOLONG;
129 buf[len++] = ch;
131 buf[len] = '\0';
132 if (len == 0)
133 return T_ILCHAR;
135 if (_bcs_isdigit((int)(unsigned char)buf[0])) {
136 unsigned long ul;
137 char *post;
138 ul = strtoul(buf, &post, 0);
139 if (buf+len != post)
140 return T_ILCHAR;
141 token_data->constant = ul;
142 return T_CONSTANT;
145 #ifdef ALLOW_ARBITRARY_IDENTIFIER
146 strcpy(token_data->identifier, buf);
147 return T_IDENTIFIER;
148 #else
149 if (!strcmp(buf, PLURAL_NUMBER_SYMBOL))
150 return T_IDENTIFIER;
151 return T_ILCHAR;
152 #endif
155 /* tokenizer main routine */
156 static int
157 tokenize(struct tokenizer_context *tcx, union token_data *token_data)
159 int ch, prevch;
161 retry:
162 ch = _memstream_getc(&tcx->memstream);
163 if (_bcs_isspace(ch))
164 goto retry;
166 switch (ch) {
167 case T_EOF:
168 return ch;
169 case '+': case '-':
170 token_data->op = ch;
171 return T_ADDITIVE;
172 case '*': case '/': case '%':
173 token_data->op = ch;
174 return T_MULTIPLICATIVE;
175 case '?': case ':': case '(': case ')':
176 token_data->op = ch;
177 return ch;
178 case '&': case '|':
179 prevch = ch;
180 ch = _memstream_getc(&tcx->memstream);
181 if (ch != prevch) {
182 _memstream_ungetc(&tcx->memstream, ch);
183 return T_ILCHAR;
185 token_data->op = ch;
186 switch (ch) {
187 case '&':
188 return T_LAND;
189 case '|':
190 return T_LOR;
192 /*NOTREACHED*/
193 case '=': case '!': case '<': case '>':
194 prevch = ch;
195 ch = _memstream_getc(&tcx->memstream);
196 if (ch != '=') {
197 _memstream_ungetc(&tcx->memstream, ch);
198 switch (prevch) {
199 case '=':
200 return T_ILCHAR;
201 case '!':
202 return '!';
203 case '<':
204 case '>':
205 token_data->op = prevch; /* OP_LT or OP_GT */
206 return T_RELATIONAL;
209 /* '==', '!=', '<=' or '>=' */
210 token_data->op = ch+prevch;
211 switch (prevch) {
212 case '=':
213 case '!':
214 return T_EQUALITY;
215 case '<':
216 case '>':
217 return T_RELATIONAL;
219 /*NOTREACHED*/
222 _memstream_ungetc(&tcx->memstream, ch);
223 return tokenize_atom(tcx, token_data);
226 /* get the next token */
227 static int
228 get_token(struct tokenizer_context *tcx, union token_data *token_data)
230 if (tcx->token0.token != T_NONE) {
231 int token = tcx->token0.token;
232 tcx->token0.token = T_NONE;
233 *token_data = tcx->token0.token_data;
234 return token;
236 return tokenize(tcx, token_data);
239 /* push back the last token */
240 static void
241 unget_token(struct tokenizer_context *tcx,
242 int token, union token_data *token_data)
244 tcx->token0.token = token;
245 tcx->token0.token_data = *token_data;
248 #ifdef TEST_TOKENIZER
251 main(int argc, char **argv)
253 struct tokenizer_context tcx;
254 union token_data token_data;
255 int token;
257 if (argc != 2) {
258 fprintf(stderr, "usage: %s <expression>\n", argv[0]);
259 return EXIT_FAILURE;
262 init_tokenizer_context(&tcx);
263 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
265 while (1) {
266 token = get_token(&tcx, &token_data);
267 switch (token) {
268 case T_EOF:
269 goto quit;
270 case T_ILCHAR:
271 printf("illegal character.\n");
272 goto quit;
273 case T_TOOLONG:
274 printf("too long atom.\n");
275 goto quit;
276 case T_CONSTANT:
277 printf("constant: %lu\n", token_data.constant);
278 break;
279 case T_IDENTIFIER:
280 printf("symbol: %s\n", token_data.identifier);
281 break;
282 default:
283 printf("operator: ");
284 switch (token) {
285 case T_LAND:
286 printf("&&\n");
287 break;
288 case T_LOR:
289 printf("||\n");
290 break;
291 case T_EQUALITY:
292 printf("%c=\n", token_data.op-'=');
293 break;
294 case T_RELATIONAL:
295 switch(token_data.op) {
296 case OP_LTEQ:
297 case OP_GTEQ:
298 printf("%c=\n", token_data.op-'=');
299 break;
300 default:
301 printf("%c\n", token_data.op);
302 break;
304 break;
305 case T_ADDITIVE:
306 case T_MULTIPLICATIVE:
307 printf("%c\n", token_data.op);
308 break;
309 default:
310 printf("operator: %c\n", token);
314 quit:
315 return 0;
317 #endif /* TEST_TOKENIZER */
320 /* ----------------------------------------------------------------------
321 * parser part
323 * exp := cond
325 * cond := lor | lor '?' cond ':' cond
327 * lor := land ( '||' land )*
329 * land := equality ( '&&' equality )*
331 * equality := relational ( equalityops relational )*
332 * equalityops := '==' | '!='
334 * relational := additive ( relationalops additive )*
335 * relationalops := '<' | '>' | '<=' | '>='
337 * additive := multiplicative ( additiveops multiplicative )*
338 * additiveops := '+' | '-'
340 * multiplicative := lnot ( multiplicativeops lnot )*
341 * multiplicativeops := '*' | '/' | '%'
343 * lnot := '!' lnot | term
345 * term := literal | identifier | '(' exp ')'
349 #define T_ENSURE_OK(token, label) \
350 do { \
351 if (T_IS_ERROR(token)) \
352 goto label; \
353 } while (/*CONSTCOND*/0)
354 #define T_ENSURE_SOMETHING(token, label) \
355 do { \
356 if ((token) == T_EOF) { \
357 token = T_ILEND; \
358 goto label; \
359 } else if (T_IS_ERROR(token)) \
360 goto label; \
361 } while (/*CONSTCOND*/0)
363 #define parser_element plural_element
365 struct parser_element;
366 struct parser_op
368 char op;
369 struct parser_element *operands[MAX_NUM_OPERANDS];
371 struct parser_element
373 int kind;
374 union
376 struct parser_op parser_op;
377 union token_data token_data;
378 } u;
381 struct parser_op2_transition
383 int kind;
384 const struct parser_op2_transition *next;
387 /* prototypes */
388 static int parse_cond(struct tokenizer_context *, struct parser_element *);
391 /* transition table for the 2-operand operators */
392 #define DEF_TR(t, k, n) \
393 static struct parser_op2_transition exp_tr_##t = { \
394 k, &exp_tr_##n \
396 #define DEF_TR0(t, k) \
397 static struct parser_op2_transition exp_tr_##t = { \
398 k, NULL /* expect lnot */ \
401 DEF_TR0(multiplicative, T_MULTIPLICATIVE);
402 DEF_TR(additive, T_ADDITIVE, multiplicative);
403 DEF_TR(relational, T_RELATIONAL, additive);
404 DEF_TR(equality, T_EQUALITY, relational);
405 DEF_TR(land, T_LAND, equality);
406 DEF_TR(lor, T_LOR, land);
408 /* init a parser element structure */
409 static void
410 init_parser_element(struct parser_element *pe)
412 int i;
414 pe->kind = T_NONE;
415 for (i=0; i<MAX_NUM_OPERANDS; i++)
416 pe->u.parser_op.operands[i] = NULL;
419 /* uninitialize a parser element structure with freeing children */
420 static void free_parser_element(struct parser_element *);
421 static void
422 uninit_parser_element(struct parser_element *pe)
424 int i;
426 if (T_IS_OPERATOR(pe->kind))
427 for (i=0; i<MAX_NUM_OPERANDS; i++)
428 if (pe->u.parser_op.operands[i])
429 free_parser_element(
430 pe->u.parser_op.operands[i]);
433 /* free a parser element structure with freeing children */
434 static void
435 free_parser_element(struct parser_element *pe)
437 if (pe) {
438 uninit_parser_element(pe);
439 free(pe);
444 /* copy a parser element structure shallowly */
445 static void
446 copy_parser_element(struct parser_element *dpe,
447 const struct parser_element *spe)
449 memcpy(dpe, spe, sizeof *dpe);
452 /* duplicate a parser element structure shallowly */
453 static struct parser_element *
454 dup_parser_element(const struct parser_element *pe)
456 struct parser_element *dpe = malloc(sizeof *dpe);
457 if (dpe)
458 copy_parser_element(dpe, pe);
459 return dpe;
462 /* term := identifier | constant | '(' exp ')' */
463 static int
464 parse_term(struct tokenizer_context *tcx, struct parser_element *pelem)
466 struct parser_element pe0;
467 int token;
468 union token_data token_data;
470 token = get_token(tcx, &token_data);
471 switch (token) {
472 case '(':
473 /* '(' exp ')' */
474 init_parser_element(&pe0);
475 /* expect exp */
476 token = parse_cond(tcx, &pe0);
477 T_ENSURE_OK(token, err);
478 /* expect ')' */
479 token = get_token(tcx, &token_data);
480 T_ENSURE_SOMETHING(token, err);
481 if (token != ')') {
482 unget_token(tcx, token, &token_data);
483 token = T_ILTOKEN;
484 goto err;
486 copy_parser_element(pelem, &pe0);
487 return token;
488 err:
489 uninit_parser_element(&pe0);
490 return token;
491 case T_IDENTIFIER:
492 case T_CONSTANT:
493 pelem->kind = token;
494 pelem->u.token_data = token_data;
495 return token;
496 case T_EOF:
497 return T_ILEND;
498 default:
499 return T_ILTOKEN;
503 /* lnot := '!' lnot | term */
504 static int
505 parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem)
507 struct parser_element pe0;
508 int token;
509 union token_data token_data;
511 init_parser_element(&pe0);
513 /* '!' or not */
514 token = get_token(tcx, &token_data);
515 if (token != '!') {
516 /* stop: term */
517 unget_token(tcx, token, &token_data);
518 return parse_term(tcx, pelem);
521 /* '!' term */
522 token = parse_lnot(tcx, &pe0);
523 T_ENSURE_OK(token, err);
525 pelem->kind = '!';
526 pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
527 return pelem->kind;
528 err:
529 uninit_parser_element(&pe0);
530 return token;
533 /* ext_op := ext_next ( op ext_next )* */
534 static int
535 parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem,
536 const struct parser_op2_transition *tr)
538 struct parser_element pe0, pe1, peop;
539 int token;
540 union token_data token_data;
541 char op;
543 /* special case: expect lnot */
544 if (tr == NULL)
545 return parse_lnot(tcx, pelem);
547 init_parser_element(&pe0);
548 init_parser_element(&pe1);
549 token = parse_op2(tcx, &pe0, tr->next);
550 T_ENSURE_OK(token, err);
552 while (/*CONSTCOND*/1) {
553 /* expect op or empty */
554 token = get_token(tcx, &token_data);
555 if (token != tr->kind) {
556 /* stop */
557 unget_token(tcx, token, &token_data);
558 copy_parser_element(pelem, &pe0);
559 break;
561 op = token_data.op;
562 /* right hand */
563 token = parse_op2(tcx, &pe1, tr->next);
564 T_ENSURE_OK(token, err);
566 init_parser_element(&peop);
567 peop.kind = tr->kind;
568 peop.u.parser_op.op = op;
569 peop.u.parser_op.operands[0] = dup_parser_element(&pe0);
570 init_parser_element(&pe0);
571 peop.u.parser_op.operands[1] = dup_parser_element(&pe1);
572 init_parser_element(&pe1);
573 copy_parser_element(&pe0, &peop);
575 return pelem->kind;
576 err:
577 uninit_parser_element(&pe1);
578 uninit_parser_element(&pe0);
579 return token;
582 /* cond := lor | lor '?' cond ':' cond */
583 static int
584 parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem)
586 struct parser_element pe0, pe1, pe2;
587 int token;
588 union token_data token_data;
590 init_parser_element(&pe0);
591 init_parser_element(&pe1);
592 init_parser_element(&pe2);
594 /* expect lor or empty */
595 token = parse_op2(tcx, &pe0, &exp_tr_lor);
596 T_ENSURE_OK(token, err);
598 /* '?' or not */
599 token = get_token(tcx, &token_data);
600 if (token != '?') {
601 /* stop: lor */
602 unget_token(tcx, token, &token_data);
603 copy_parser_element(pelem, &pe0);
604 return pe0.kind;
607 /* lor '?' cond ':' cond */
608 /* expect cond */
609 token = parse_cond(tcx, &pe1);
610 T_ENSURE_OK(token, err);
612 /* expect ':' */
613 token = get_token(tcx, &token_data);
614 T_ENSURE_OK(token, err);
615 if (token != ':') {
616 unget_token(tcx, token, &token_data);
617 token = T_ILTOKEN;
618 goto err;
621 /* expect cond */
622 token = parse_cond(tcx, &pe2);
623 T_ENSURE_OK(token, err);
625 pelem->kind = '?';
626 pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
627 pelem->u.parser_op.operands[1] = dup_parser_element(&pe1);
628 pelem->u.parser_op.operands[2] = dup_parser_element(&pe2);
629 return pelem->kind;
630 err:
631 uninit_parser_element(&pe2);
632 uninit_parser_element(&pe1);
633 uninit_parser_element(&pe0);
634 return token;
637 static int
638 parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem)
640 int token, token1;
641 union token_data token_data;
643 #ifdef ALLOW_EMPTY
644 /* empty check */
645 token = get_token(tcx, &token_data);
646 if (token == T_EOF)
647 return token;
648 unget_token(tcx, token, &token_data);
649 #endif
651 token = parse_cond(tcx, pelem);
652 if (!T_IS_ERROR(token)) {
653 /* termination check */
654 token1 = get_token(tcx, &token_data);
655 if (token1 == T_EOF)
656 return token;
657 else if (!T_IS_ERROR(token))
658 unget_token(tcx, token1, &token_data);
659 return T_ILTOKEN;
661 return token;
665 #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL)
666 #include <stdio.h>
668 static void dump_elem(struct parser_element *);
670 static void
671 dump_op2(struct parser_element *pelem)
673 dump_elem(pelem->u.parser_op.operands[0]);
674 printf(" ");
675 dump_elem(pelem->u.parser_op.operands[1]);
676 printf(")");
679 static void
680 dump_op3(struct parser_element *pelem)
682 dump_elem(pelem->u.parser_op.operands[0]);
683 printf(" ");
684 dump_elem(pelem->u.parser_op.operands[1]);
685 printf(" ");
686 dump_elem(pelem->u.parser_op.operands[2]);
687 printf(")");
690 static void
691 dump_elem(struct parser_element *pelem)
693 switch (pelem->kind) {
694 case T_LAND:
695 printf("(&& ");
696 dump_op2(pelem);
697 break;
698 case T_LOR:
699 printf("(|| ");
700 dump_op2(pelem);
701 break;
702 case T_EQUALITY:
703 switch (pelem->u.parser_op.op) {
704 case OP_EQ:
705 printf("(== ");
706 break;
707 case OP_NEQ:
708 printf("(!= ");
709 break;
711 dump_op2(pelem);
712 break;
713 case T_RELATIONAL:
714 switch (pelem->u.parser_op.op) {
715 case '<':
716 case '>':
717 printf("(%c ", pelem->u.parser_op.op);
718 break;
719 case OP_LTEQ:
720 case OP_GTEQ:
721 printf("(%c= ", pelem->u.parser_op.op-'=');
722 break;
724 dump_op2(pelem);
725 break;
726 case T_ADDITIVE:
727 case T_MULTIPLICATIVE:
728 printf("(%c ", pelem->u.parser_op.op);
729 dump_op2(pelem);
730 break;
731 case '!':
732 printf("(! ");
733 dump_elem(pelem->u.parser_op.operands[0]);
734 printf(")");
735 break;
736 case '?':
737 printf("(? ");
738 dump_op3(pelem);
739 break;
740 case T_CONSTANT:
741 printf("%d", pelem->u.token_data.constant);
742 break;
743 case T_IDENTIFIER:
744 #ifdef ALLOW_ARBITRARY_IDENTIFIER
745 printf("%s", pelem->u.token_data.identifier);
746 #else
747 printf(PLURAL_NUMBER_SYMBOL);
748 #endif
749 break;
752 #endif
753 #ifdef TEST_PARSER
755 main(int argc, char **argv)
757 struct tokenizer_context tcx;
758 struct parser_element pelem;
759 int token;
761 if (argc != 2) {
762 fprintf(stderr, "usage: %s <expression>\n", argv[0]);
763 return EXIT_FAILURE;
766 init_tokenizer_context(&tcx);
767 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
769 init_parser_element(&pelem);
770 token = parse_exp(&tcx, &pelem);
772 if (token == T_EOF)
773 printf("none");
774 else if (T_IS_ERROR(token))
775 printf("error: 0x%X", token);
776 else
777 dump_elem(&pelem);
778 printf("\n");
780 uninit_parser_element(&pelem);
782 return EXIT_SUCCESS;
784 #endif /* TEST_PARSER */
786 /* ----------------------------------------------------------------------
787 * calcurate plural number
789 static unsigned long
790 calculate_plural(const struct parser_element *pe, unsigned long n)
792 unsigned long val0, val1;
793 switch (pe->kind) {
794 case T_IDENTIFIER:
795 return n;
796 case T_CONSTANT:
797 return pe->u.token_data.constant;
798 case '?':
799 val0 = calculate_plural(pe->u.parser_op.operands[0], n);
800 if (val0)
801 val1=calculate_plural(pe->u.parser_op.operands[1], n);
802 else
803 val1=calculate_plural(pe->u.parser_op.operands[2], n);
804 return val1;
805 case '!':
806 return !calculate_plural(pe->u.parser_op.operands[0], n);
807 case T_MULTIPLICATIVE:
808 case T_ADDITIVE:
809 case T_RELATIONAL:
810 case T_EQUALITY:
811 case T_LOR:
812 case T_LAND:
813 val0 = calculate_plural(pe->u.parser_op.operands[0], n);
814 val1 = calculate_plural(pe->u.parser_op.operands[1], n);
815 switch (pe->u.parser_op.op) {
816 case '*':
817 return val0*val1;
818 case '/':
819 return val0/val1;
820 case '%':
821 return val0%val1;
822 case '+':
823 return val0+val1;
824 case '-':
825 return val0-val1;
826 case '<':
827 return val0<val1;
828 case '>':
829 return val0>val1;
830 case OP_LTEQ:
831 return val0<=val1;
832 case OP_GTEQ:
833 return val0>=val1;
834 case OP_EQ:
835 return val0==val1;
836 case OP_NEQ:
837 return val0!=val1;
838 case '|':
839 return val0||val1;
840 case '&':
841 return val0&&val1;
844 return 0;
847 #ifdef TEST_CALC_PLURAL
848 #include <stdio.h>
851 main(int argc, char **argv)
853 struct tokenizer_context tcx;
854 struct parser_element pelem;
855 int token;
857 if (argc != 3) {
858 fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]);
859 return EXIT_FAILURE;
862 init_tokenizer_context(&tcx);
863 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
865 init_parser_element(&pelem);
866 token = parse_exp(&tcx, &pelem);
868 if (token == T_EOF)
869 printf("none");
870 else if (T_IS_ERROR(token))
871 printf("error: 0x%X", token);
872 else {
873 printf("plural = %lu",
874 calculate_plural(&pelem, atoi(argv[2])));
876 printf("\n");
878 uninit_parser_element(&pelem);
880 return EXIT_SUCCESS;
882 #endif /* TEST_CALC_PLURAL */
885 /* ----------------------------------------------------------------------
886 * parse plural forms
889 static void
890 region_skip_ws(struct _region *r)
892 const char *str = _region_head(r);
893 size_t len = _region_size(r);
895 str = _bcs_skip_ws_len(str, &len);
896 _region_init(r, __UNCONST(str), len);
899 static void
900 region_trunc_rws(struct _region *r)
902 const char *str = _region_head(r);
903 size_t len = _region_size(r);
905 _bcs_trunc_rws_len(str, &len);
906 _region_init(r, __UNCONST(str), len);
909 static int
910 region_check_prefix(struct _region *r, const char *pre, size_t prelen,
911 int ignorecase)
913 if (_region_size(r) < prelen)
914 return -1;
916 if (ignorecase) {
917 if (_bcs_strncasecmp(_region_head(r), pre, prelen))
918 return -1;
919 } else {
920 if (memcmp(_region_head(r), pre, prelen))
921 return -1;
923 return 0;
926 static int
927 cut_trailing_semicolon(struct _region *r)
930 region_trunc_rws(r);
931 if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';')
932 return -1;
933 _region_get_subregion(r, r, 0, _region_size(r)-1);
934 return 0;
937 static int
938 find_plural_forms(struct _region *r)
940 struct _memstream ms;
941 struct _region rr;
943 _memstream_bind(&ms, r);
945 while (!_memstream_getln_region(&ms, &rr)) {
946 if (!region_check_prefix(&rr,
947 PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) {
948 _region_get_subregion(
949 r, &rr, LEN_PLURAL_FORMS,
950 _region_size(&rr)-LEN_PLURAL_FORMS);
951 region_skip_ws(r);
952 region_trunc_rws(r);
953 return 0;
956 return -1;
959 static int
960 skip_assignment(struct _region *r, const char *sym, size_t symlen)
962 region_skip_ws(r);
963 if (region_check_prefix(r, sym, symlen, 0))
964 return -1;
965 _region_get_subregion(r, r, symlen, _region_size(r)-symlen);
966 region_skip_ws(r);
967 if (_region_size(r) == 0 || _region_peek8(r, 0) != '=')
968 return -1;
969 _region_get_subregion(r, r, 1, _region_size(r)-1);
970 region_skip_ws(r);
971 return 0;
974 static int
975 skip_nplurals(struct _region *r, unsigned long *rnp)
977 unsigned long np;
978 char buf[MAX_LEN_ATOM+2], *endptr;
979 const char *endptrconst;
980 size_t ofs;
982 if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL))
983 return -1;
984 if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0)))
985 return -1;
986 strlcpy(buf, _region_head(r), sizeof (buf));
987 np = strtoul(buf, &endptr, 0);
988 endptrconst = _bcs_skip_ws(endptr);
989 if (*endptrconst != ';')
990 return -1;
991 ofs = endptrconst+1-buf;
992 if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs))
993 return -1;
994 if (rnp)
995 *rnp = np;
996 return 0;
999 static int
1000 parse_plural_body(struct _region *r, struct parser_element **rpe)
1002 int token;
1003 struct tokenizer_context tcx;
1004 struct parser_element pelem, *ppe;
1006 init_tokenizer_context(&tcx);
1007 _memstream_bind(&tcx.memstream, r);
1009 init_parser_element(&pelem);
1010 token = parse_exp(&tcx, &pelem);
1011 if (T_IS_ERROR(token))
1012 return token;
1014 ppe = dup_parser_element(&pelem);
1015 if (ppe == NULL) {
1016 uninit_parser_element(&pelem);
1017 return T_NOMEM;
1020 *rpe = ppe;
1022 return 0;
1025 static int
1026 parse_plural(struct parser_element **rpe, unsigned long *rnp,
1027 const char *str, size_t len)
1029 struct _region r;
1031 _region_init(&r, __UNCONST(str), len);
1033 if (find_plural_forms(&r))
1034 return T_NOTFOUND;
1035 if (skip_nplurals(&r, rnp))
1036 return T_ILPLURAL;
1037 if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL))
1038 return T_ILPLURAL;
1039 if (cut_trailing_semicolon(&r))
1040 return T_ILPLURAL;
1041 return parse_plural_body(&r, rpe);
1044 #ifdef TEST_PARSE_PLURAL
1046 main(int argc, char **argv)
1048 int ret;
1049 struct parser_element *pelem;
1050 unsigned long np;
1052 if (argc != 2 && argc != 3) {
1053 fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]);
1054 return EXIT_FAILURE;
1057 ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1]));
1059 if (ret == T_EOF)
1060 printf("none");
1061 else if (T_IS_ERROR(ret))
1062 printf("error: 0x%X", ret);
1063 else {
1064 printf("syntax tree: ");
1065 dump_elem(pelem);
1066 printf("\nnplurals = %lu", np);
1067 if (argv[2])
1068 printf(", plural = %lu",
1069 calculate_plural(pelem, atoi(argv[2])));
1070 free_parser_element(pelem);
1072 printf("\n");
1075 return EXIT_SUCCESS;
1077 #endif /* TEST_PARSE_PLURAL */
1080 * external interface
1084 _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp,
1085 const char *str, size_t len)
1087 return parse_plural((struct parser_element **)rpe, rnp, str, len);
1090 unsigned long
1091 _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n)
1093 return calculate_plural((void *)__UNCONST(pe), n);
1096 void
1097 _gettext_free_plural(struct gettext_plural *pe)
1099 free_parser_element((void *)pe);
1102 #ifdef TEST_PLURAL
1103 #include <libintl.h>
1104 #include <locale.h>
1106 #define PR(n) printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n))
1109 main(void)
1111 bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */
1112 PR(1);
1113 PR(2);
1114 PR(3);
1115 PR(4);
1117 return 0;
1119 #endif