lib/libintl/plural_parser.c

   1 /*      $NetBSD: plural_parser.c,v 1.2 2007/01/17 23:24:22 hubertf Exp $        */
   2
   3 /*-
   4  * Copyright (c) 2005 Citrus Project,
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  *
  28  */
  29
  30 #include <sys/cdefs.h>
  31 __RCSID("$NetBSD: plural_parser.c,v 1.2 2007/01/17 23:24:22 hubertf Exp $");
  32
  33 #include <assert.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <citrus/citrus_namespace.h>
  38 #include <citrus/citrus_region.h>
  39 #include <citrus/citrus_memstream.h>
  40 #include <citrus/citrus_bcs.h>
  41 #include "plural_parser.h"
  42
  43 #if defined(TEST_TOKENIZER) || defined(TEST_PARSER)
  44 #define ALLOW_EMPTY
  45 #define ALLOW_ARBITRARY_IDENTIFIER
  46 #endif
  47
  48 #define MAX_LEN_ATOM            10
  49 #define MAX_NUM_OPERANDS        3
  50
  51 #define T_EOF                   EOF
  52 #define T_NONE                  0x100
  53 #define T_LAND                  0x101   /* && */
  54 #define T_LOR                   0x102   /* || */
  55 #define T_EQUALITY              0x103   /* == or != */
  56 #define T_RELATIONAL            0x104   /* <, >, <= or >= */
  57 #define T_ADDITIVE              0x105   /* + or - */
  58 #define T_MULTIPLICATIVE        0x106   /* *, / or % */
  59 #define T_IDENTIFIER            0x200
  60 #define T_CONSTANT              0x201
  61 #define T_ILCHAR                0x300
  62 #define T_TOOLONG               0x301
  63 #define T_ILTOKEN               0x302
  64 #define T_ILEND                 0x303
  65 #define T_NOMEM                 0x304
  66 #define T_NOTFOUND              0x305
  67 #define T_ILPLURAL              0x306
  68 #define T_IS_OPERATOR(t)        ((t) < 0x200)
  69 #define T_IS_ERROR(t)           ((t) >= 0x300)
  70
  71 #define OP_EQ                   ('='+'=')
  72 #define OP_NEQ                  ('!'+'=')
  73 #define OP_LTEQ                 ('<'+'=')
  74 #define OP_GTEQ                 ('>'+'=')
  75
  76 #define PLURAL_NUMBER_SYMBOL    "n"
  77 #define NPLURALS_SYMBOL         "nplurals"
  78 #define LEN_NPLURAL_SYMBOL      (sizeof (NPLURALS_SYMBOL) -1)
  79 #define PLURAL_SYMBOL           "plural"
  80 #define LEN_PLURAL_SYMBOL       (sizeof (PLURAL_SYMBOL) -1)
  81 #define PLURAL_FORMS            "Plural-Forms:"
  82 #define LEN_PLURAL_FORMS        (sizeof (PLURAL_FORMS) -1)
  83
  84 /* ----------------------------------------------------------------------
  85  * tokenizer part
  86  */
  87
  88 union token_data
  89 {
  90         unsigned long constant;
  91 #ifdef ALLOW_ARBITRARY_IDENTIFIER
  92         char identifier[MAX_LEN_ATOM+1];
  93 #endif
  94         char op;
  95 };
  96
  97 struct tokenizer_context
  98 {
  99         struct _memstream memstream;
 100         struct {
 101                 int token;
 102                 union token_data token_data;
 103         } token0;
 104 };
 105
 106 /* initialize a tokenizer context */
 107 static void
 108 init_tokenizer_context(struct tokenizer_context *tcx)
 109 {
 110         tcx->token0.token = T_NONE;
 111 }
 112
 113 /* get an atom (identifier or constant) */
 114 static int
 115 tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data)
 116 {
 117         int ch, len;
 118         char buf[MAX_LEN_ATOM+1];
 119
 120         len = 0;
 121         while (/*CONSTCOND*/1) {
 122                 ch = _memstream_getc(&tcx->memstream);
 123                 if (!(_bcs_isalnum(ch) || ch == '_')) {
 124                         _memstream_ungetc(&tcx->memstream, ch);
 125                         break;
 126                 }
 127                 if (len == MAX_LEN_ATOM)
 128                         return T_TOOLONG;
 129                 buf[len++] = ch;
 130         }
 131         buf[len] = '\0';
 132         if (len == 0)
 133                 return T_ILCHAR;
 134
 135         if (_bcs_isdigit((int)(unsigned char)buf[0])) {
 136                 unsigned long ul;
 137                 char *post;
 138                 ul = strtoul(buf, &post, 0);
 139                 if (buf+len != post)
 140                         return T_ILCHAR;
 141                 token_data->constant = ul;
 142                 return T_CONSTANT;
 143         }
 144
 145 #ifdef ALLOW_ARBITRARY_IDENTIFIER
 146         strcpy(token_data->identifier, buf);
 147         return T_IDENTIFIER;
 148 #else
 149         if (!strcmp(buf, PLURAL_NUMBER_SYMBOL))
 150                 return T_IDENTIFIER;
 151         return T_ILCHAR;
 152 #endif
 153 }
 154
 155 /* tokenizer main routine */
 156 static int
 157 tokenize(struct tokenizer_context *tcx, union token_data *token_data)
 158 {
 159         int ch, prevch;
 160
 161 retry:
 162         ch = _memstream_getc(&tcx->memstream);
 163         if (_bcs_isspace(ch))
 164                 goto retry;
 165
 166         switch (ch) {
 167         case T_EOF:
 168                 return ch;
 169         case '+': case '-':
 170                 token_data->op = ch;
 171                 return T_ADDITIVE;
 172         case '*': case '/': case '%':
 173                 token_data->op = ch;
 174                 return T_MULTIPLICATIVE;
 175         case '?': case ':': case '(': case ')':
 176                 token_data->op = ch;
 177                 return ch;
 178         case '&': case '|':
 179                 prevch = ch;
 180                 ch = _memstream_getc(&tcx->memstream);
 181                 if (ch != prevch) {
 182                         _memstream_ungetc(&tcx->memstream, ch);
 183                         return T_ILCHAR;
 184                 }
 185                 token_data->op = ch;
 186                 switch (ch) {
 187                 case '&':
 188                         return T_LAND;
 189                 case '|':
 190                         return T_LOR;
 191                 }
 192                 /*NOTREACHED*/
 193         case '=': case '!': case '<': case '>':
 194                 prevch = ch;
 195                 ch = _memstream_getc(&tcx->memstream);
 196                 if (ch != '=') {
 197                         _memstream_ungetc(&tcx->memstream, ch);
 198                         switch (prevch) {
 199                         case '=':
 200                                 return T_ILCHAR;
 201                         case '!':
 202                                 return '!';
 203                         case '<':
 204                         case '>':
 205                                 token_data->op = prevch; /* OP_LT or OP_GT */
 206                                 return T_RELATIONAL;
 207                         }
 208                 }
 209                 /* '==', '!=', '<=' or '>=' */
 210                 token_data->op = ch+prevch;
 211                 switch (prevch) {
 212                 case '=':
 213                 case '!':
 214                         return T_EQUALITY;
 215                 case '<':
 216                 case '>':
 217                         return T_RELATIONAL;
 218                 }
 219                 /*NOTREACHED*/
 220         }
 221
 222         _memstream_ungetc(&tcx->memstream, ch);
 223         return tokenize_atom(tcx, token_data);
 224 }
 225
 226 /* get the next token */
 227 static int
 228 get_token(struct tokenizer_context *tcx, union token_data *token_data)
 229 {
 230         if (tcx->token0.token != T_NONE) {
 231                 int token = tcx->token0.token;
 232                 tcx->token0.token = T_NONE;
 233                 *token_data = tcx->token0.token_data;
 234                 return token;
 235         }
 236         return tokenize(tcx, token_data);
 237 }
 238
 239 /* push back the last token */
 240 static void
 241 unget_token(struct tokenizer_context *tcx,
 242             int token, union token_data *token_data)
 243 {
 244         tcx->token0.token = token;
 245         tcx->token0.token_data = *token_data;
 246 }
 247
 248 #ifdef TEST_TOKENIZER
 249
 250 int
 251 main(int argc, char **argv)
 252 {
 253         struct tokenizer_context tcx;
 254         union token_data token_data;
 255         int token;
 256
 257         if (argc != 2) {
 258                 fprintf(stderr, "usage: %s <expression>\n", argv[0]);
 259                 return EXIT_FAILURE;
 260         }
 261
 262         init_tokenizer_context(&tcx);
 263         _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
 264
 265         while (1) {
 266                 token = get_token(&tcx, &token_data);
 267                 switch (token) {
 268                 case T_EOF:
 269                         goto quit;
 270                 case T_ILCHAR:
 271                         printf("illegal character.\n");
 272                         goto quit;
 273                 case T_TOOLONG:
 274                         printf("too long atom.\n");
 275                         goto quit;
 276                 case T_CONSTANT:
 277                         printf("constant: %lu\n", token_data.constant);
 278                         break;
 279                 case T_IDENTIFIER:
 280                         printf("symbol: %s\n", token_data.identifier);
 281                         break;
 282                 default:
 283                         printf("operator: ");
 284                         switch (token) {
 285                         case T_LAND:
 286                                 printf("&&\n");
 287                                 break;
 288                         case T_LOR:
 289                                 printf("||\n");
 290                                 break;
 291                         case T_EQUALITY:
 292                                 printf("%c=\n", token_data.op-'=');
 293                                 break;
 294                         case T_RELATIONAL:
 295                                 switch(token_data.op) {
 296                                 case OP_LTEQ:
 297                                 case OP_GTEQ:
 298                                         printf("%c=\n", token_data.op-'=');
 299                                         break;
 300                                 default:
 301                                         printf("%c\n", token_data.op);
 302                                         break;
 303                                 }
 304                                 break;
 305                         case T_ADDITIVE:
 306                         case T_MULTIPLICATIVE:
 307                                 printf("%c\n", token_data.op);
 308                                 break;
 309                         default:
 310                                 printf("operator: %c\n", token);
 311                         }
 312                 }
 313         }
 314 quit:
 315         return 0;
 316 }
 317 #endif /* TEST_TOKENIZER */
 318
 319
 320 /* ----------------------------------------------------------------------
 321  * parser part
 322  *
 323  * exp := cond
 324  *
 325  * cond := lor | lor '?' cond ':' cond
 326  *
 327  * lor := land ( '||' land )*
 328  *
 329  * land := equality ( '&&' equality )*
 330  *
 331  * equality := relational ( equalityops relational )*
 332  * equalityops := '==' | '!='
 333  *
 334  * relational := additive ( relationalops additive )*
 335  * relationalops := '<' | '>' | '<=' | '>='
 336  *
 337  * additive := multiplicative ( additiveops multiplicative )*
 338  * additiveops := '+' | '-'
 339  *
 340  * multiplicative := lnot ( multiplicativeops lnot )*
 341  * multiplicativeops := '*' | '/' | '%'
 342  *
 343  * lnot := '!' lnot | term
 344  *
 345  * term := literal | identifier | '(' exp ')'
 346  *
 347  */
 348
 349 #define T_ENSURE_OK(token, label)                                             \
 350 do {                                                                          \
 351         if (T_IS_ERROR(token))                                                \
 352                 goto label;                                                   \
 353 } while (/*CONSTCOND*/0)
 354 #define T_ENSURE_SOMETHING(token, label)                                      \
 355 do {                                                                          \
 356         if ((token) == T_EOF) {                                               \
 357                 token = T_ILEND;                                              \
 358                 goto label;                                                   \
 359         } else if (T_IS_ERROR(token))                                         \
 360                 goto label;                                                   \
 361 } while (/*CONSTCOND*/0)
 362
 363 #define parser_element  plural_element
 364
 365 struct parser_element;
 366 struct parser_op
 367 {
 368         char op;
 369         struct parser_element *operands[MAX_NUM_OPERANDS];
 370 };
 371 struct parser_element
 372 {
 373         int kind;
 374         union
 375         {
 376                 struct parser_op parser_op;
 377                 union token_data token_data;
 378         } u;
 379 };
 380
 381 struct parser_op2_transition
 382 {
 383         int                                     kind;
 384         const struct parser_op2_transition      *next;
 385 };
 386
 387 /* prototypes */
 388 static int parse_cond(struct tokenizer_context *, struct parser_element *);
 389
 390
 391 /* transition table for the 2-operand operators */
 392 #define DEF_TR(t, k, n)                                                       \
 393 static struct parser_op2_transition exp_tr_##t = {                            \
 394         k, &exp_tr_##n                                                        \
 395 }
 396 #define DEF_TR0(t, k)                                                         \
 397 static struct parser_op2_transition exp_tr_##t = {                            \
 398         k, NULL /* expect lnot */                                             \
 399 }
 400
 401 DEF_TR0(multiplicative, T_MULTIPLICATIVE);
 402 DEF_TR(additive, T_ADDITIVE, multiplicative);
 403 DEF_TR(relational, T_RELATIONAL, additive);
 404 DEF_TR(equality, T_EQUALITY, relational);
 405 DEF_TR(land, T_LAND, equality);
 406 DEF_TR(lor, T_LOR, land);
 407
 408 /* init a parser element structure */
 409 static void
 410 init_parser_element(struct parser_element *pe)
 411 {
 412         int i;
 413
 414         pe->kind = T_NONE;
 415         for (i=0; i<MAX_NUM_OPERANDS; i++)
 416                 pe->u.parser_op.operands[i] = NULL;
 417 }
 418
 419 /* uninitialize a parser element structure with freeing children */
 420 static void free_parser_element(struct parser_element *);
 421 static void
 422 uninit_parser_element(struct parser_element *pe)
 423 {
 424         int i;
 425
 426         if (T_IS_OPERATOR(pe->kind))
 427                 for (i=0; i<MAX_NUM_OPERANDS; i++)
 428                         if (pe->u.parser_op.operands[i])
 429                                 free_parser_element(
 430                                         pe->u.parser_op.operands[i]);
 431 }
 432
 433 /* free a parser element structure with freeing children */
 434 static void
 435 free_parser_element(struct parser_element *pe)
 436 {
 437         if (pe) {
 438                 uninit_parser_element(pe);
 439                 free(pe);
 440         }
 441 }
 442
 443
 444 /* copy a parser element structure shallowly */
 445 static void
 446 copy_parser_element(struct parser_element *dpe,
 447                     const struct parser_element *spe)
 448 {
 449         memcpy(dpe, spe, sizeof *dpe);
 450 }
 451
 452 /* duplicate a parser element structure shallowly */
 453 static struct parser_element *
 454 dup_parser_element(const struct parser_element *pe)
 455 {
 456         struct parser_element *dpe = malloc(sizeof *dpe);
 457         if (dpe)
 458                 copy_parser_element(dpe, pe);
 459         return dpe;
 460 }
 461
 462 /* term := identifier | constant | '(' exp ')' */
 463 static int
 464 parse_term(struct tokenizer_context *tcx, struct parser_element *pelem)
 465 {
 466         struct parser_element pe0;
 467         int token;
 468         union token_data token_data;
 469
 470         token = get_token(tcx, &token_data);
 471         switch (token) {
 472         case '(':
 473                 /* '(' exp ')' */
 474                 init_parser_element(&pe0);
 475                 /* expect exp */
 476                 token = parse_cond(tcx, &pe0);
 477                 T_ENSURE_OK(token, err);
 478                 /* expect ')' */
 479                 token = get_token(tcx, &token_data);
 480                 T_ENSURE_SOMETHING(token, err);
 481                 if (token != ')') {
 482                         unget_token(tcx, token, &token_data);
 483                         token = T_ILTOKEN;
 484                         goto err;
 485                 }
 486                 copy_parser_element(pelem, &pe0);
 487                 return token;
 488 err:
 489                 uninit_parser_element(&pe0);
 490                 return token;
 491         case T_IDENTIFIER:
 492         case T_CONSTANT:
 493                 pelem->kind = token;
 494                 pelem->u.token_data = token_data;
 495                 return token;
 496         case T_EOF:
 497                 return T_ILEND;
 498         default:
 499                 return T_ILTOKEN;
 500         }
 501 }
 502
 503 /* lnot := '!' lnot | term */
 504 static int
 505 parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem)
 506 {
 507         struct parser_element pe0;
 508         int token;
 509         union token_data token_data;
 510
 511         init_parser_element(&pe0);
 512
 513         /* '!' or not */
 514         token = get_token(tcx, &token_data);
 515         if (token != '!') {
 516                 /* stop: term */
 517                 unget_token(tcx, token, &token_data);
 518                 return parse_term(tcx, pelem);
 519         }
 520
 521         /* '!' term */
 522         token = parse_lnot(tcx, &pe0);
 523         T_ENSURE_OK(token, err);
 524
 525         pelem->kind = '!';
 526         pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
 527         return pelem->kind;
 528 err:
 529         uninit_parser_element(&pe0);
 530         return token;
 531 }
 532
 533 /* ext_op := ext_next ( op ext_next )* */
 534 static int
 535 parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem,
 536           const struct parser_op2_transition *tr)
 537 {
 538         struct parser_element pe0, pe1, peop;
 539         int token;
 540         union token_data token_data;
 541         char op;
 542
 543         /* special case: expect lnot */
 544         if (tr == NULL)
 545                 return parse_lnot(tcx, pelem);
 546
 547         init_parser_element(&pe0);
 548         init_parser_element(&pe1);
 549         token = parse_op2(tcx, &pe0, tr->next);
 550         T_ENSURE_OK(token, err);
 551
 552         while (/*CONSTCOND*/1) {
 553                 /* expect op or empty */
 554                 token = get_token(tcx, &token_data);
 555                 if (token != tr->kind) {
 556                         /* stop */
 557                         unget_token(tcx, token, &token_data);
 558                         copy_parser_element(pelem, &pe0);
 559                         break;
 560                 }
 561                 op = token_data.op;
 562                 /* right hand */
 563                 token = parse_op2(tcx, &pe1, tr->next);
 564                 T_ENSURE_OK(token, err);
 565
 566                 init_parser_element(&peop);
 567                 peop.kind = tr->kind;
 568                 peop.u.parser_op.op = op;
 569                 peop.u.parser_op.operands[0] = dup_parser_element(&pe0);
 570                 init_parser_element(&pe0);
 571                 peop.u.parser_op.operands[1] = dup_parser_element(&pe1);
 572                 init_parser_element(&pe1);
 573                 copy_parser_element(&pe0, &peop);
 574         }
 575         return pelem->kind;
 576 err:
 577         uninit_parser_element(&pe1);
 578         uninit_parser_element(&pe0);
 579         return token;
 580 }
 581
 582 /* cond := lor | lor '?' cond ':' cond */
 583 static int
 584 parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem)
 585 {
 586         struct parser_element pe0, pe1, pe2;
 587         int token;
 588         union token_data token_data;
 589
 590         init_parser_element(&pe0);
 591         init_parser_element(&pe1);
 592         init_parser_element(&pe2);
 593
 594         /* expect lor or empty */
 595         token = parse_op2(tcx, &pe0, &exp_tr_lor);
 596         T_ENSURE_OK(token, err);
 597
 598         /* '?' or not */
 599         token = get_token(tcx, &token_data);
 600         if (token != '?') {
 601                 /* stop: lor */
 602                 unget_token(tcx, token, &token_data);
 603                 copy_parser_element(pelem, &pe0);
 604                 return pe0.kind;
 605         }
 606
 607         /* lor '?' cond ':' cond */
 608         /* expect cond */
 609         token = parse_cond(tcx, &pe1);
 610         T_ENSURE_OK(token, err);
 611
 612         /* expect ':' */
 613         token = get_token(tcx, &token_data);
 614         T_ENSURE_OK(token, err);
 615         if (token != ':') {
 616                 unget_token(tcx, token, &token_data);
 617                 token = T_ILTOKEN;
 618                 goto err;
 619         }
 620
 621         /* expect cond */
 622         token = parse_cond(tcx, &pe2);
 623         T_ENSURE_OK(token, err);
 624
 625         pelem->kind = '?';
 626         pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
 627         pelem->u.parser_op.operands[1] = dup_parser_element(&pe1);
 628         pelem->u.parser_op.operands[2] = dup_parser_element(&pe2);
 629         return pelem->kind;
 630 err:
 631         uninit_parser_element(&pe2);
 632         uninit_parser_element(&pe1);
 633         uninit_parser_element(&pe0);
 634         return token;
 635 }
 636
 637 static int
 638 parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem)
 639 {
 640         int token, token1;
 641         union token_data token_data;
 642
 643 #ifdef ALLOW_EMPTY
 644         /* empty check */
 645         token = get_token(tcx, &token_data);
 646         if (token == T_EOF)
 647                 return token;
 648         unget_token(tcx, token, &token_data);
 649 #endif
 650
 651         token = parse_cond(tcx, pelem);
 652         if (!T_IS_ERROR(token)) {
 653                 /* termination check */
 654                 token1 = get_token(tcx, &token_data);
 655                 if (token1 == T_EOF)
 656                         return token;
 657                 else if (!T_IS_ERROR(token))
 658                          unget_token(tcx, token1, &token_data);
 659                 return T_ILTOKEN;
 660         }
 661         return token;
 662 }
 663
 664
 665 #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL)
 666 #include <stdio.h>
 667
 668 static void dump_elem(struct parser_element *);
 669
 670 static void
 671 dump_op2(struct parser_element *pelem)
 672 {
 673         dump_elem(pelem->u.parser_op.operands[0]);
 674         printf(" ");
 675         dump_elem(pelem->u.parser_op.operands[1]);
 676         printf(")");
 677 }
 678
 679 static void
 680 dump_op3(struct parser_element *pelem)
 681 {
 682         dump_elem(pelem->u.parser_op.operands[0]);
 683         printf(" ");
 684         dump_elem(pelem->u.parser_op.operands[1]);
 685         printf(" ");
 686         dump_elem(pelem->u.parser_op.operands[2]);
 687         printf(")");
 688 }
 689
 690 static void
 691 dump_elem(struct parser_element *pelem)
 692 {
 693         switch (pelem->kind) {
 694         case T_LAND:
 695                 printf("(&& ");
 696                 dump_op2(pelem);
 697                 break;
 698         case T_LOR:
 699                 printf("(|| ");
 700                 dump_op2(pelem);
 701                 break;
 702         case T_EQUALITY:
 703                 switch (pelem->u.parser_op.op) {
 704                 case OP_EQ:
 705                         printf("(== ");
 706                         break;
 707                 case OP_NEQ:
 708                         printf("(!= ");
 709                         break;
 710                 }
 711                 dump_op2(pelem);
 712                 break;
 713         case T_RELATIONAL:
 714                 switch (pelem->u.parser_op.op) {
 715                 case '<':
 716                 case '>':
 717                         printf("(%c ", pelem->u.parser_op.op);
 718                         break;
 719                 case OP_LTEQ:
 720                 case OP_GTEQ:
 721                         printf("(%c= ", pelem->u.parser_op.op-'=');
 722                         break;
 723                 }
 724                 dump_op2(pelem);
 725                 break;
 726         case T_ADDITIVE:
 727         case T_MULTIPLICATIVE:
 728                 printf("(%c ", pelem->u.parser_op.op);
 729                 dump_op2(pelem);
 730                 break;
 731         case '!':
 732                 printf("(! ");
 733                 dump_elem(pelem->u.parser_op.operands[0]);
 734                 printf(")");
 735                 break;
 736         case '?':
 737                 printf("(? ");
 738                 dump_op3(pelem);
 739                 break;
 740         case T_CONSTANT:
 741                 printf("%d", pelem->u.token_data.constant);
 742                 break;
 743         case T_IDENTIFIER:
 744 #ifdef ALLOW_ARBITRARY_IDENTIFIER
 745                 printf("%s", pelem->u.token_data.identifier);
 746 #else
 747                 printf(PLURAL_NUMBER_SYMBOL);
 748 #endif
 749                 break;
 750         }
 751 }
 752 #endif
 753 #ifdef TEST_PARSER
 754 int
 755 main(int argc, char **argv)
 756 {
 757         struct tokenizer_context tcx;
 758         struct parser_element pelem;
 759         int token;
 760
 761         if (argc != 2) {
 762                 fprintf(stderr, "usage: %s <expression>\n", argv[0]);
 763                 return EXIT_FAILURE;
 764         }
 765
 766         init_tokenizer_context(&tcx);
 767         _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
 768
 769         init_parser_element(&pelem);
 770         token = parse_exp(&tcx, &pelem);
 771
 772         if (token == T_EOF)
 773                 printf("none");
 774         else if (T_IS_ERROR(token))
 775                 printf("error: 0x%X", token);
 776         else
 777                 dump_elem(&pelem);
 778         printf("\n");
 779
 780         uninit_parser_element(&pelem);
 781
 782         return EXIT_SUCCESS;
 783 }
 784 #endif /* TEST_PARSER */
 785
 786 /* ----------------------------------------------------------------------
 787  * calcurate plural number
 788  */
 789 static unsigned long
 790 calculate_plural(const struct parser_element *pe, unsigned long n)
 791 {
 792         unsigned long val0, val1;
 793         switch (pe->kind) {
 794         case T_IDENTIFIER:
 795                 return n;
 796         case T_CONSTANT:
 797                 return pe->u.token_data.constant;
 798         case '?':
 799                 val0 = calculate_plural(pe->u.parser_op.operands[0], n);
 800                 if (val0)
 801                         val1=calculate_plural(pe->u.parser_op.operands[1], n);
 802                 else
 803                         val1=calculate_plural(pe->u.parser_op.operands[2], n);
 804                 return val1;
 805         case '!':
 806                 return !calculate_plural(pe->u.parser_op.operands[0], n);
 807         case T_MULTIPLICATIVE:
 808         case T_ADDITIVE:
 809         case T_RELATIONAL:
 810         case T_EQUALITY:
 811         case T_LOR:
 812         case T_LAND:
 813                 val0 = calculate_plural(pe->u.parser_op.operands[0], n);
 814                 val1 = calculate_plural(pe->u.parser_op.operands[1], n);
 815                 switch (pe->u.parser_op.op) {
 816                 case '*':
 817                         return val0*val1;
 818                 case '/':
 819                         return val0/val1;
 820                 case '%':
 821                         return val0%val1;
 822                 case '+':
 823                         return val0+val1;
 824                 case '-':
 825                         return val0-val1;
 826                 case '<':
 827                         return val0<val1;
 828                 case '>':
 829                         return val0>val1;
 830                 case OP_LTEQ:
 831                         return val0<=val1;
 832                 case OP_GTEQ:
 833                         return val0>=val1;
 834                 case OP_EQ:
 835                         return val0==val1;
 836                 case OP_NEQ:
 837                         return val0!=val1;
 838                 case '|':
 839                         return val0||val1;
 840                 case '&':
 841                         return val0&&val1;
 842                 }
 843         }
 844         return 0;
 845 }
 846
 847 #ifdef TEST_CALC_PLURAL
 848 #include <stdio.h>
 849
 850 int
 851 main(int argc, char **argv)
 852 {
 853         struct tokenizer_context tcx;
 854         struct parser_element pelem;
 855         int token;
 856
 857         if (argc != 3) {
 858                 fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]);
 859                 return EXIT_FAILURE;
 860         }
 861
 862         init_tokenizer_context(&tcx);
 863         _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
 864
 865         init_parser_element(&pelem);
 866         token = parse_exp(&tcx, &pelem);
 867
 868         if (token == T_EOF)
 869                 printf("none");
 870         else if (T_IS_ERROR(token))
 871                 printf("error: 0x%X", token);
 872         else {
 873                 printf("plural = %lu",
 874                        calculate_plural(&pelem, atoi(argv[2])));
 875         }
 876         printf("\n");
 877
 878         uninit_parser_element(&pelem);
 879
 880         return EXIT_SUCCESS;
 881 }
 882 #endif /* TEST_CALC_PLURAL */
 883
 884
 885 /* ----------------------------------------------------------------------
 886  * parse plural forms
 887  */
 888
 889 static void
 890 region_skip_ws(struct _region *r)
 891 {
 892         const char *str = _region_head(r);
 893         size_t len = _region_size(r);
 894
 895         str = _bcs_skip_ws_len(str, &len);
 896         _region_init(r, __UNCONST(str), len);
 897 }
 898
 899 static void
 900 region_trunc_rws(struct _region *r)
 901 {
 902         const char *str = _region_head(r);
 903         size_t len = _region_size(r);
 904
 905         _bcs_trunc_rws_len(str, &len);
 906         _region_init(r, __UNCONST(str), len);
 907 }
 908
 909 static int
 910 region_check_prefix(struct _region *r, const char *pre, size_t prelen,
 911                     int ignorecase)
 912 {
 913         if (_region_size(r) < prelen)
 914                 return -1;
 915
 916         if (ignorecase) {
 917                 if (_bcs_strncasecmp(_region_head(r), pre, prelen))
 918                         return -1;
 919         } else {
 920                 if (memcmp(_region_head(r), pre, prelen))
 921                         return -1;
 922         }
 923         return 0;
 924 }
 925
 926 static int
 927 cut_trailing_semicolon(struct _region *r)
 928 {
 929
 930         region_trunc_rws(r);
 931         if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';')
 932                 return -1;
 933         _region_get_subregion(r, r, 0, _region_size(r)-1);
 934         return 0;
 935 }
 936
 937 static int
 938 find_plural_forms(struct _region *r)
 939 {
 940         struct _memstream ms;
 941         struct _region rr;
 942
 943         _memstream_bind(&ms, r);
 944
 945         while (!_memstream_getln_region(&ms, &rr)) {
 946                 if (!region_check_prefix(&rr,
 947                                          PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) {
 948                         _region_get_subregion(
 949                                 r, &rr, LEN_PLURAL_FORMS,
 950                                 _region_size(&rr)-LEN_PLURAL_FORMS);
 951                         region_skip_ws(r);
 952                         region_trunc_rws(r);
 953                         return 0;
 954                 }
 955         }
 956         return -1;
 957 }
 958
 959 static int
 960 skip_assignment(struct _region *r, const char *sym, size_t symlen)
 961 {
 962         region_skip_ws(r);
 963         if (region_check_prefix(r, sym, symlen, 0))
 964                 return -1;
 965         _region_get_subregion(r, r, symlen, _region_size(r)-symlen);
 966         region_skip_ws(r);
 967         if (_region_size(r) == 0 || _region_peek8(r, 0) != '=')
 968                 return -1;
 969         _region_get_subregion(r, r, 1, _region_size(r)-1);
 970         region_skip_ws(r);
 971         return 0;
 972 }
 973
 974 static int
 975 skip_nplurals(struct _region *r, unsigned long *rnp)
 976 {
 977         unsigned long np;
 978         char buf[MAX_LEN_ATOM+2], *endptr;
 979         const char *endptrconst;
 980         size_t ofs;
 981
 982         if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL))
 983                 return -1;
 984         if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0)))
 985                 return -1;
 986         strlcpy(buf, _region_head(r), sizeof (buf));
 987         np = strtoul(buf, &endptr, 0);
 988         endptrconst = _bcs_skip_ws(endptr);
 989         if (*endptrconst != ';')
 990                 return -1;
 991         ofs = endptrconst+1-buf;
 992         if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs))
 993                 return -1;
 994         if (rnp)
 995                 *rnp = np;
 996         return 0;
 997 }
 998
 999 static int
1000 parse_plural_body(struct _region *r, struct parser_element **rpe)
1001 {
1002         int token;
1003         struct tokenizer_context tcx;
1004         struct parser_element pelem, *ppe;
1005
1006         init_tokenizer_context(&tcx);
1007         _memstream_bind(&tcx.memstream, r);
1008
1009         init_parser_element(&pelem);
1010         token = parse_exp(&tcx, &pelem);
1011         if (T_IS_ERROR(token))
1012                 return token;
1013
1014         ppe = dup_parser_element(&pelem);
1015         if (ppe == NULL) {
1016                 uninit_parser_element(&pelem);
1017                 return T_NOMEM;
1018         }
1019
1020         *rpe = ppe;
1021
1022         return 0;
1023 }
1024
1025 static int
1026 parse_plural(struct parser_element **rpe, unsigned long *rnp,
1027              const char *str, size_t len)
1028 {
1029         struct _region r;
1030
1031         _region_init(&r, __UNCONST(str), len);
1032
1033         if (find_plural_forms(&r))
1034                 return T_NOTFOUND;
1035         if (skip_nplurals(&r, rnp))
1036                 return T_ILPLURAL;
1037         if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL))
1038                 return T_ILPLURAL;
1039         if (cut_trailing_semicolon(&r))
1040                 return T_ILPLURAL;
1041         return parse_plural_body(&r, rpe);
1042 }
1043
1044 #ifdef TEST_PARSE_PLURAL
1045 int
1046 main(int argc, char **argv)
1047 {
1048         int ret;
1049         struct parser_element *pelem;
1050         unsigned long np;
1051
1052         if (argc != 2 && argc != 3) {
1053                 fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]);
1054                 return EXIT_FAILURE;
1055         }
1056
1057         ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1]));
1058
1059         if (ret == T_EOF)
1060                 printf("none");
1061         else if (T_IS_ERROR(ret))
1062                 printf("error: 0x%X", ret);
1063         else {
1064                 printf("syntax tree: ");
1065                 dump_elem(pelem);
1066                 printf("\nnplurals = %lu", np);
1067                 if (argv[2])
1068                         printf(", plural = %lu",
1069                                calculate_plural(pelem, atoi(argv[2])));
1070                 free_parser_element(pelem);
1071         }
1072         printf("\n");
1073
1074
1075         return EXIT_SUCCESS;
1076 }
1077 #endif /* TEST_PARSE_PLURAL */
1078
1079 /*
1080  * external interface
1081  */
1082
1083 int
1084 _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp,
1085                       const char *str, size_t len)
1086 {
1087         return parse_plural((struct parser_element **)rpe, rnp, str, len);
1088 }
1089
1090 unsigned long
1091 _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n)
1092 {
1093         return calculate_plural((void *)__UNCONST(pe), n);
1094 }
1095
1096 void
1097 _gettext_free_plural(struct gettext_plural *pe)
1098 {
1099         free_parser_element((void *)pe);
1100 }
1101
1102 #ifdef TEST_PLURAL
1103 #include <libintl.h>
1104 #include <locale.h>
1105
1106 #define PR(n)   printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n))
1107
1108 int
1109 main(void)
1110 {
1111         bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */
1112         PR(1);
1113         PR(2);
1114         PR(3);
1115         PR(4);
1116
1117         return 0;
1118 }
1119 #endif