uspace/app/sbi/src/lex.c

   1 /*
   2  * Copyright (c) 2011 Jiri Svoboda
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * - Redistributions of source code must retain the above copyright
  10  *   notice, this list of conditions and the following disclaimer.
  11  * - Redistributions in binary form must reproduce the above copyright
  12  *   notice, this list of conditions and the following disclaimer in the
  13  *   documentation and/or other materials provided with the distribution.
  14  * - The name of the author may not be used to endorse or promote products
  15  *   derived from this software without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 /** @file Lexer (lexical analyzer).
  30  *
  31  * Consumes a text file and produces a sequence of lexical elements (lems).
  32  */
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include "bigint.h"
  37 #include "cspan.h"
  38 #include "mytypes.h"
  39 #include "input.h"
  40 #include "os/os.h"
  41 #include "strtab.h"
  42
  43 #include "lex.h"
  44
  45 #define TAB_WIDTH 8
  46
  47 typedef enum {
  48         cs_chr,
  49         cs_str
  50 } chr_str_t;
  51
  52 static void lex_touch(lex_t *lex);
  53 static bool_t lex_read_try(lex_t *lex);
  54
  55 static void lex_skip_comment(lex_t *lex);
  56 static void lex_skip_ws(lex_t *lex);
  57 static bool_t is_wstart(char c);
  58 static bool_t is_wcont(char c);
  59 static bool_t is_digit(char c);
  60 static void lex_word(lex_t *lex);
  61 static void lex_char(lex_t *lex);
  62 static void lex_number(lex_t *lex);
  63 static void lex_string(lex_t *lex);
  64 static void lex_char_string_core(lex_t *lex, chr_str_t cs);
  65 static int digit_value(char c);
  66
  67 /* Note: This imposes an implementation limit on identifier length. */
  68 #define IBUF_SIZE 128
  69 static char ident_buf[IBUF_SIZE + 1];
  70
  71 /* XXX This imposes an implementation limit on string literal length. */
  72 #define SLBUF_SIZE 128
  73 static char strlit_buf[SLBUF_SIZE + 1];
  74
  75 /** Lclass-string pair */
  76 struct lc_name {
  77         lclass_t lclass;
  78         const char *name;
  79 };
  80
  81 /** Keyword names. Used both for printing and recognition. */
  82 static struct lc_name keywords[] = {
  83         { lc_and,       "and" },
  84         { lc_as,        "as" },
  85         { lc_bool,      "bool" },
  86         { lc_break,     "break" },
  87         { lc_builtin,   "builtin" },
  88         { lc_char,      "char" },
  89         { lc_class,     "class" },
  90         { lc_deleg,     "deleg" },
  91         { lc_do,        "do" },
  92         { lc_elif,      "elif" },
  93         { lc_else,      "else" },
  94         { lc_end,       "end" },
  95         { lc_enum,      "enum" },
  96         { lc_except,    "except" },
  97         { lc_false,     "false" },
  98         { lc_finally,   "finally" },
  99         { lc_for,       "for" },
 100         { lc_fun,       "fun" },
 101         { lc_get,       "get" },
 102         { lc_if,        "if" },
 103         { lc_in,        "in" },
 104         { lc_int,       "int" },
 105         { lc_interface, "interface" },
 106         { lc_is,        "is" },
 107         { lc_new,       "new" },
 108         { lc_not,       "not" },
 109         { lc_nil,       "nil" },
 110         { lc_or,        "or" },
 111         { lc_override,  "override" },
 112         { lc_packed,    "packed" },
 113         { lc_private,   "private" },
 114         { lc_prop,      "prop" },
 115         { lc_protected, "protected" },
 116         { lc_public,    "public" },
 117         { lc_raise,     "raise" },
 118         { lc_resource,  "resource" },
 119         { lc_return,    "return" },
 120         { lc_self,      "self" },
 121         { lc_set,       "set" },
 122         { lc_static,    "static" },
 123         { lc_string,    "string" },
 124         { lc_struct,    "struct" },
 125         { lc_switch,    "switch" },
 126         { lc_then,      "then" },
 127         { lc_this,      "this" },
 128         { lc_true,      "true" },
 129         { lc_var,       "var" },
 130         { lc_with,      "with" },
 131         { lc_when,      "when" },
 132         { lc_while,     "while" },
 133         { lc_yield,     "yield" },
 134
 135         { 0,            NULL }
 136 };
 137
 138 /** Other simple lclasses. Only used for printing. */
 139 static struct lc_name simple_lc[] = {
 140         { lc_invalid,   "INVALID" },
 141         { lc_eof,       "EOF" },
 142
 143         /* Operators */
 144         { lc_period,    "." },
 145         { lc_slash,     "/" },
 146         { lc_lparen,    "(" },
 147         { lc_rparen,    ")" },
 148         { lc_lsbr,      "[" },
 149         { lc_rsbr,      "]" },
 150         { lc_equal,     "==" },
 151         { lc_notequal,  "!=" },
 152         { lc_lt,        "<" },
 153         { lc_gt,        ">" },
 154         { lc_lt_equal,  "<=" },
 155         { lc_gt_equal,  ">=" },
 156         { lc_assign,    "=" },
 157         { lc_plus,      "+" },
 158         { lc_minus,     "-" },
 159         { lc_mult,      "*" },
 160         { lc_increase,  "+=" },
 161
 162         /* Punctuators */
 163         { lc_comma,     "," },
 164         { lc_colon,     ":" },
 165         { lc_scolon,    ";" },
 166
 167         { 0,            NULL },
 168 };
 169
 170 /** Print lclass value.
 171  *
 172  * Prints lclass (lexical element class) value in human-readable form
 173  * (for debugging).
 174  *
 175  * @param lclass        Lclass value for display.
 176  */
 177 void lclass_print(lclass_t lclass)
 178 {
 179         struct lc_name *dp;
 180
 181         dp = keywords;
 182         while (dp->name != NULL) {
 183                 if (dp->lclass == lclass) {
 184                         printf("%s", dp->name);
 185                         return;
 186                 }
 187                 ++dp;
 188         }
 189
 190         dp = simple_lc;
 191         while (dp->name != NULL) {
 192                 if (dp->lclass == lclass) {
 193                         printf("%s", dp->name);
 194                         return;
 195                 }
 196                 ++dp;
 197         }
 198
 199         switch (lclass) {
 200         case lc_ident:
 201                 printf("ident");
 202                 break;
 203         case lc_lit_int:
 204                 printf("int_literal");
 205                 break;
 206         case lc_lit_string:
 207                 printf("string_literal");
 208                 break;
 209         default:
 210                 printf("<unknown?>");
 211                 break;
 212         }
 213 }
 214
 215 /** Print lexical element.
 216  *
 217  * Prints lexical element in human-readable form (for debugging).
 218  *
 219  * @param lem           Lexical element for display.
 220  */
 221 void lem_print(lem_t *lem)
 222 {
 223         lclass_print(lem->lclass);
 224
 225         switch (lem->lclass) {
 226         case lc_ident:
 227                 printf("('%s')", strtab_get_str(lem->u.ident.sid));
 228                 break;
 229         case lc_lit_int:
 230                 printf("(");
 231                 bigint_print(&lem->u.lit_int.value);
 232                 printf(")");
 233                 break;
 234         case lc_lit_string:
 235                 printf("(\"%s\")", lem->u.lit_string.value);
 236         default:
 237                 break;
 238         }
 239 }
 240
 241 /** Print lem coordinates.
 242  *
 243  * Print the coordinates (line number, column number) of a lexical element.
 244  *
 245  * @param lem           Lexical element for coordinate printing.
 246  */
 247 void lem_print_coords(lem_t *lem)
 248 {
 249         cspan_print(lem->cspan);
 250 }
 251
 252 /** Initialize lexer instance.
 253  *
 254  * @param lex           Lexer object to initialize.
 255  * @param input         Input to associate with lexer.
 256  */
 257 void lex_init(lex_t *lex, struct input *input)
 258 {
 259         errno_t rc;
 260
 261         lex->input = input;
 262
 263         rc = input_get_line(lex->input, &lex->inbuf);
 264         if (rc != EOK) {
 265                 printf("Error reading input.\n");
 266                 exit(1);
 267         }
 268
 269         lex->ibp = lex->inbuf;
 270         lex->col_adj = 0;
 271         lex->prev_valid = b_false;
 272         lex->current_valid = b_true;
 273 }
 274
 275 /** Advance to next lexical element.
 276  *
 277  * The new element is read in lazily then it is actually accessed.
 278  *
 279  * @param lex           Lexer object.
 280  */
 281 void lex_next(lex_t *lex)
 282 {
 283         /* Make sure the current lem has already been read in. */
 284         lex_touch(lex);
 285
 286         /* Force a new lem to be read on next access. */
 287         lex->current_valid = b_false;
 288 }
 289
 290 /** Get current lem.
 291  *
 292  * The returned pointer is invalidated by next call to lex_next()
 293  *
 294  * @param lex           Lexer object.
 295  * @return              Pointer to current lem. Owned by @a lex and only valid
 296  *                      until next call to lex_xxx().
 297  */
 298 lem_t *lex_get_current(lex_t *lex)
 299 {
 300         lex_touch(lex);
 301         return &lex->current;
 302 }
 303
 304 /** Get previous lem if valid.
 305  *
 306  * The returned pointer is invalidated by next call to lex_next()
 307  *
 308  * @param lex           Lexer object.
 309  * @return              Pointer to previous lem. Owned by @a lex and only valid
 310  *                      until next call to lex_xxx().
 311  */
 312 lem_t *lex_peek_prev(lex_t *lex)
 313 {
 314         if (lex->current_valid == b_false) {
 315                 /*
 316                  * This means the head is advanced but next lem was not read.
 317                  * Thus the previous lem is still in @a current.
 318                  */
 319                 return &lex->current;
 320         }
 321
 322         if (lex->prev_valid != b_true) {
 323                 /* Looks like we are still at the first lem. */
 324                 return NULL;
 325         }
 326
 327         /*
 328          * Current lem has been read in. Thus the previous lem was moved to
 329          * @a previous.
 330          */
 331         return &lex->prev;
 332 }
 333
 334 /** Read in the current lexical element (unless already read in).
 335  *
 336  * @param lex           Lexer object.
 337  */
 338 static void lex_touch(lex_t *lex)
 339 {
 340         bool_t got_lem;
 341
 342         if (lex->current_valid == b_true)
 343                 return;
 344
 345         /* Copy previous lem */
 346         lex->prev = lex->current;
 347         lex->prev_valid = b_true;
 348
 349         do {
 350                 got_lem = lex_read_try(lex);
 351         } while (got_lem == b_false);
 352
 353         lex->current_valid = b_true;
 354 }
 355
 356 /** Try reading next lexical element.
 357  *
 358  * Attemps to read the next lexical element. In some cases (such as a comment)
 359  * this function will need to give it another try and returns @c b_false
 360  * in such case.
 361  *
 362  * @param lex           Lexer object.
 363  * @return              @c b_true on success or @c b_false if it needs
 364  *                      restarting. On success the lem is stored to
 365  *                      the current lem in @a lex.
 366  */
 367 static bool_t lex_read_try(lex_t *lex)
 368 {
 369         char *bp, *lsp;
 370         int line0, col0;
 371
 372         lex_skip_ws(lex);
 373
 374         /*
 375          * Record lem coordinates. Line number we already have. For column
 376          * number we start with position in the input buffer. This works
 377          * for all characters except tab. Thus we keep track of tabs
 378          * separately using col_adj.
 379          */
 380         line0 = input_get_line_no(lex->input);
 381         col0 = 1 + lex->col_adj + (lex->ibp - lex->inbuf);
 382
 383         lex->current.cspan = cspan_new(lex->input, line0, col0, line0, col0);
 384
 385         lsp = lex->ibp;
 386         bp = lex->ibp;
 387
 388         if (bp[0] == '\0') {
 389                 /* End of input */
 390                 lex->current.lclass = lc_eof;
 391                 goto finish;
 392         }
 393
 394         if (is_wstart(bp[0])) {
 395                 lex_word(lex);
 396                 goto finish;
 397         }
 398
 399         if (bp[0] == '\'') {
 400                 lex_char(lex);
 401                 goto finish;
 402         }
 403
 404         if (is_digit(bp[0])) {
 405                 lex_number(lex);
 406                 goto finish;
 407         }
 408
 409         if (bp[0] == '"') {
 410                 lex_string(lex);
 411                 goto finish;
 412         }
 413
 414         if (bp[0] == '-' && bp[1] == '-') {
 415                 lex_skip_comment(lex);
 416
 417                 /* Compute ending column number */
 418                 lex->current.cspan->col1 = col0 + (lex->ibp - lsp) - 1;
 419
 420                 /* Try again */
 421                 return b_false;
 422         }
 423
 424         switch (bp[0]) {
 425         case ',':
 426                 lex->current.lclass = lc_comma;
 427                 ++bp;
 428                 break;
 429         case ':':
 430                 lex->current.lclass = lc_colon;
 431                 ++bp;
 432                 break;
 433         case ';':
 434                 lex->current.lclass = lc_scolon;
 435                 ++bp;
 436                 break;
 437
 438         case '.':
 439                 lex->current.lclass = lc_period;
 440                 ++bp;
 441                 break;
 442         case '/':
 443                 lex->current.lclass = lc_slash;
 444                 ++bp;
 445                 break;
 446         case '(':
 447                 lex->current.lclass = lc_lparen;
 448                 ++bp;
 449                 break;
 450         case ')':
 451                 lex->current.lclass = lc_rparen;
 452                 ++bp;
 453                 break;
 454         case '[':
 455                 lex->current.lclass = lc_lsbr;
 456                 ++bp;
 457                 break;
 458         case ']':
 459                 lex->current.lclass = lc_rsbr;
 460                 ++bp;
 461                 break;
 462
 463         case '=':
 464                 if (bp[1] == '=') {
 465                         lex->current.lclass = lc_equal;
 466                         bp += 2;
 467                         break;
 468                 }
 469                 lex->current.lclass = lc_assign;
 470                 ++bp;
 471                 break;
 472
 473         case '!':
 474                 if (bp[1] == '=') {
 475                         lex->current.lclass = lc_notequal;
 476                         bp += 2;
 477                         break;
 478                 }
 479                 goto invalid;
 480
 481         case '+':
 482                 if (bp[1] == '=') {
 483                         lex->current.lclass = lc_increase;
 484                         bp += 2;
 485                         break;
 486                 }
 487                 lex->current.lclass = lc_plus;
 488                 ++bp;
 489                 break;
 490
 491         case '-':
 492                 lex->current.lclass = lc_minus;
 493                 ++bp;
 494                 break;
 495
 496         case '*':
 497                 lex->current.lclass = lc_mult;
 498                 ++bp;
 499                 break;
 500
 501         case '<':
 502                 if (bp[1] == '=') {
 503                         lex->current.lclass = lc_lt_equal;
 504                         bp += 2;
 505                         break;
 506                 }
 507                 lex->current.lclass = lc_lt;
 508                 ++bp;
 509                 break;
 510
 511         case '>':
 512                 if (bp[1] == '=') {
 513                         lex->current.lclass = lc_gt_equal;
 514                         bp += 2;
 515                         break;
 516                 }
 517                 lex->current.lclass = lc_gt;
 518                 ++bp;
 519                 break;
 520
 521         default:
 522                 goto invalid;
 523         }
 524
 525         lex->ibp = bp;
 526
 527 finish:
 528         /* Compute ending column number */
 529         lex->current.cspan->col1 = col0 + (lex->ibp - lsp) - 1;
 530         return b_true;
 531
 532 invalid:
 533         lex->current.lclass = lc_invalid;
 534         ++bp;
 535         lex->ibp = bp;
 536
 537         return b_true;
 538 }
 539
 540 /** Lex a word (identifier or keyword).
 541  *
 542  * Read in a word. This may later turn out to be a keyword or a regular
 543  * identifier. It is stored in the current lem in @a lex.
 544  *
 545  * @param lex           Lexer object.
 546  */
 547 static void lex_word(lex_t *lex)
 548 {
 549         struct lc_name *dp;
 550         char *bp;
 551         int idx;
 552
 553         bp = lex->ibp;
 554         ident_buf[0] = bp[0];
 555         idx = 1;
 556
 557         while (is_wcont(bp[idx])) {
 558                 if (idx >= IBUF_SIZE) {
 559                         printf("Error: Identifier too long.\n");
 560                         exit(1);
 561                 }
 562
 563                 ident_buf[idx] = bp[idx];
 564                 ++idx;
 565         }
 566
 567         lex->ibp = bp + idx;
 568
 569         ident_buf[idx] = '\0';
 570
 571         dp = keywords;
 572         while (dp->name != NULL) {
 573                 if (os_str_cmp(ident_buf, dp->name) == 0) {
 574                         /* Match */
 575                         lex->current.lclass = dp->lclass;
 576                         return;
 577                 }
 578                 ++dp;
 579         }
 580
 581         /* No matching keyword -- it must be an identifier. */
 582         lex->current.lclass = lc_ident;
 583         lex->current.u.ident.sid = strtab_get_sid(ident_buf);
 584 }
 585
 586 /** Lex a character literal.
 587  *
 588  * Reads in a character literal and stores it in the current lem in @a lex.
 589  *
 590  * @param lex           Lexer object.
 591  */
 592 static void lex_char(lex_t *lex)
 593 {
 594         size_t len;
 595         int char_val;
 596
 597         lex_char_string_core(lex, cs_chr);
 598
 599         len = os_str_length(strlit_buf);
 600         if (len != 1) {
 601                 printf("Character literal should contain one character, "
 602                     "but contains %u characters instead.\n", (unsigned) len);
 603                 exit(1);
 604         }
 605
 606         os_str_get_char(strlit_buf, 0, &char_val);
 607         lex->current.lclass = lc_lit_char;
 608         bigint_init(&lex->current.u.lit_char.value, char_val);
 609 }
 610
 611 /** Lex a numeric literal.
 612  *
 613  * Reads in a numeric literal and stores it in the current lem in @a lex.
 614  *
 615  * @param lex           Lexer object.
 616  */
 617 static void lex_number(lex_t *lex)
 618 {
 619         char *bp;
 620         bigint_t value;
 621         bigint_t dgval;
 622         bigint_t base;
 623         bigint_t tprod;
 624
 625         bp = lex->ibp;
 626
 627         bigint_init(&value, 0);
 628         bigint_init(&base, 10);
 629
 630         while (is_digit(*bp)) {
 631                 bigint_mul(&value, &base, &tprod);
 632                 bigint_init(&dgval, digit_value(*bp));
 633
 634                 bigint_destroy(&value);
 635                 bigint_add(&tprod, &dgval, &value);
 636                 bigint_destroy(&tprod);
 637                 bigint_destroy(&dgval);
 638
 639                 ++bp;
 640         }
 641
 642         bigint_destroy(&base);
 643
 644         lex->ibp = bp;
 645
 646         lex->current.lclass = lc_lit_int;
 647         bigint_shallow_copy(&value, &lex->current.u.lit_int.value);
 648 }
 649
 650 /** Lex a string literal.
 651  *
 652  * Reads in a string literal and stores it in the current lem in @a lex.
 653  *
 654  * @param lex           Lexer object.
 655  */
 656 static void lex_string(lex_t *lex)
 657 {
 658         lex_char_string_core(lex, cs_str);
 659
 660         lex->current.lclass = lc_lit_string;
 661         lex->current.u.lit_string.value = os_str_dup(strlit_buf);
 662 }
 663
 664 static void lex_char_string_core(lex_t *lex, chr_str_t cs)
 665 {
 666         char *bp;
 667         int sidx, didx;
 668         char term;
 669         const char *descr, *cap_descr;
 670         char spchar;
 671
 672         /* Make compiler happy */
 673         term = '\0';
 674         descr = NULL;
 675         cap_descr = NULL;
 676
 677         switch (cs) {
 678         case cs_chr:
 679                 term = '\'';
 680                 descr = "character";
 681                 cap_descr = "Character";
 682                 break;
 683         case cs_str:
 684                 term = '"';
 685                 descr = "string";
 686                 cap_descr = "String";
 687                 break;
 688         }
 689
 690         bp = lex->ibp + 1;
 691         sidx = didx = 0;
 692
 693         while (bp[sidx] != term) {
 694                 if (didx >= SLBUF_SIZE) {
 695                         printf("Error: %s literal too long.\n", cap_descr);
 696                         exit(1);
 697                 }
 698
 699                 if (bp[sidx] == '\0') {
 700                         printf("Error: Unterminated %s literal.\n", descr);
 701                         exit(1);
 702                 }
 703
 704                 if (bp[sidx] == '\\') {
 705                         switch (bp[sidx + 1]) {
 706                         case '\\':
 707                                 spchar = '\\';
 708                                 break;
 709                         case '\'':
 710                                 spchar = '\'';
 711                                 break;
 712                         case '"':
 713                                 spchar = '"';
 714                                 break;
 715                         case 'n':
 716                                 spchar = '\n';
 717                                 break;
 718                         case 't':
 719                                 spchar = '\t';
 720                                 break;
 721                         default:
 722                                 printf("Error: Unknown character escape sequence.\n");
 723                                 exit(1);
 724                         }
 725
 726                         strlit_buf[didx] = spchar;
 727                         ++didx;
 728                         sidx += 2;
 729                 } else {
 730                         strlit_buf[didx] = bp[sidx];
 731                         ++sidx;
 732                         ++didx;
 733                 }
 734         }
 735
 736         lex->ibp = bp + sidx + 1;
 737
 738         strlit_buf[didx] = '\0';
 739 }
 740
 741 /** Lex a single-line comment.
 742  *
 743  * This does not produce any lem. The comment is just skipped.
 744  *
 745  * @param lex           Lexer object.
 746  */
 747 static void lex_skip_comment(lex_t *lex)
 748 {
 749         char *bp;
 750
 751         bp = lex->ibp + 2;
 752
 753         while (*bp != '\n' && *bp != '\0') {
 754                 ++bp;
 755         }
 756
 757         lex->ibp = bp;
 758 }
 759
 760 /** Skip whitespace characters.
 761  *
 762  * This does not produce any lem. The whitespace is just skipped.
 763  *
 764  * @param lex           Lexer object.
 765  */
 766 static void lex_skip_ws(lex_t *lex)
 767 {
 768         char *bp;
 769         errno_t rc;
 770
 771         bp = lex->ibp;
 772
 773         while (b_true) {
 774                 while (*bp == ' ' || *bp == '\t') {
 775                         if (*bp == '\t') {
 776                                 /* XXX This is too simplifed. */
 777                                 lex->col_adj += (TAB_WIDTH - 1);
 778                         }
 779                         ++bp;
 780                 }
 781
 782                 if (*bp != '\n')
 783                         break;
 784
 785                 /* Read next line */
 786                 rc = input_get_line(lex->input, &lex->inbuf);
 787                 if (rc != EOK) {
 788                         printf("Error reading input.\n");
 789                         exit(1);
 790                 }
 791
 792                 bp = lex->inbuf;
 793                 lex->col_adj = 0;
 794         }
 795
 796         lex->ibp = bp;
 797 }
 798
 799 /** Determine if character can start a word.
 800  *
 801  * @param c     Character.
 802  * @return      @c b_true if @a c can start a word, @c b_false otherwise.
 803  */
 804 static bool_t is_wstart(char c)
 805 {
 806         return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) ||
 807             (c == '_');
 808 }
 809
 810 /** Determine if character can continue a word.
 811  *
 812  * @param c     Character.
 813  * @return      @c b_true if @a c can start continue word, @c b_false
 814  *              otherwise.
 815  */
 816 static bool_t is_wcont(char c)
 817 {
 818         return is_digit(c) || is_wstart(c);
 819 }
 820
 821 /** Determine if character is a numeric digit.
 822  *
 823  * @param c     Character.
 824  * @return      @c b_true if @a c is a numeric digit, @c b_false otherwise.
 825  */
 826 static bool_t is_digit(char c)
 827 {
 828         return ((c >= '0') && (c <= '9'));
 829 }
 830
 831 /** Determine numeric value of digit character.
 832  *
 833  * @param c     Character, must be a valid decimal digit.
 834  * @return      Value of the digit (0-9).
 835  */
 836 static int digit_value(char c)
 837 {
 838         return (c - '0');
 839 }