Parser/tokenizer.c

   1
   2 /* Tokenizer implementation */
   3
   4 #include "Python.h"
   5 #include "pgenheaders.h"
   6
   7 #include <ctype.h>
   8
   9 #include "tokenizer.h"
  10 #include "errcode.h"
  11
  12 extern char *PyOS_Readline(char *);
  13 /* Return malloc'ed string including trailing \n;
  14    empty malloc'ed string for EOF;
  15    NULL if interrupted */
  16
  17 /* Don't ever change this -- it would break the portability of Python code */
  18 #define TABSIZE 8
  19
  20 /* Convert a possibly signed character to a nonnegative int */
  21 /* XXX This assumes characters are 8 bits wide */
  22 #ifdef __CHAR_UNSIGNED__
  23 #define Py_CHARMASK(c)          (c)
  24 #else
  25 #define Py_CHARMASK(c)          ((c) & 0xff)
  26 #endif
  27
  28 /* Forward */
  29 static struct tok_state *tok_new(void);
  30 static int tok_nextc(struct tok_state *tok);
  31 static void tok_backup(struct tok_state *tok, int c);
  32
  33 /* Token names */
  34
  35 char *_PyParser_TokenNames[] = {
  36         "ENDMARKER",
  37         "NAME",
  38         "NUMBER",
  39         "STRING",
  40         "NEWLINE",
  41         "INDENT",
  42         "DEDENT",
  43         "LPAR",
  44         "RPAR",
  45         "LSQB",
  46         "RSQB",
  47         "COLON",
  48         "COMMA",
  49         "SEMI",
  50         "PLUS",
  51         "MINUS",
  52         "STAR",
  53         "SLASH",
  54         "VBAR",
  55         "AMPER",
  56         "LESS",
  57         "GREATER",
  58         "EQUAL",
  59         "DOT",
  60         "PERCENT",
  61         "BACKQUOTE",
  62         "LBRACE",
  63         "RBRACE",
  64         "EQEQUAL",
  65         "NOTEQUAL",
  66         "LESSEQUAL",
  67         "GREATEREQUAL",
  68         "TILDE",
  69         "CIRCUMFLEX",
  70         "LEFTSHIFT",
  71         "RIGHTSHIFT",
  72         "DOUBLESTAR",
  73         "PLUSEQUAL",
  74         "MINEQUAL",
  75         "STAREQUAL",
  76         "SLASHEQUAL",
  77         "PERCENTEQUAL",
  78         "AMPEREQUAL",
  79         "VBAREQUAL",
  80         "CIRCUMFLEXEQUAL",
  81         "LEFTSHIFTEQUAL",
  82         "RIGHTSHIFTEQUAL",
  83         "DOUBLESTAREQUAL",
  84         "DOUBLESLASH",
  85         "DOUBLESLASHEQUAL",
  86         /* This table must match the #defines in token.h! */
  87         "OP",
  88         "<ERRORTOKEN>",
  89         "<N_TOKENS>"
  90 };
  91
  92
  93 /* Create and initialize a new tok_state structure */
  94
  95 static struct tok_state *
  96 tok_new(void)
  97 {
  98         struct tok_state *tok = PyMem_NEW(struct tok_state, 1);
  99         if (tok == NULL)
 100                 return NULL;
 101         tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
 102         tok->done = E_OK;
 103         tok->fp = NULL;
 104         tok->tabsize = TABSIZE;
 105         tok->indent = 0;
 106         tok->indstack[0] = 0;
 107         tok->atbol = 1;
 108         tok->pendin = 0;
 109         tok->prompt = tok->nextprompt = NULL;
 110         tok->lineno = 0;
 111         tok->level = 0;
 112         tok->filename = NULL;
 113         tok->altwarning = 0;
 114         tok->alterror = 0;
 115         tok->alttabsize = 1;
 116         tok->altindstack[0] = 0;
 117         return tok;
 118 }
 119
 120
 121 /* Set up tokenizer for string */
 122
 123 struct tok_state *
 124 PyTokenizer_FromString(char *str)
 125 {
 126         struct tok_state *tok = tok_new();
 127         if (tok == NULL)
 128                 return NULL;
 129         tok->buf = tok->cur = tok->end = tok->inp = str;
 130         return tok;
 131 }
 132
 133
 134 /* Set up tokenizer for file */
 135
 136 struct tok_state *
 137 PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
 138 {
 139         struct tok_state *tok = tok_new();
 140         if (tok == NULL)
 141                 return NULL;
 142         if ((tok->buf = PyMem_NEW(char, BUFSIZ)) == NULL) {
 143                 PyMem_DEL(tok);
 144                 return NULL;
 145         }
 146         tok->cur = tok->inp = tok->buf;
 147         tok->end = tok->buf + BUFSIZ;
 148         tok->fp = fp;
 149         tok->prompt = ps1;
 150         tok->nextprompt = ps2;
 151         return tok;
 152 }
 153
 154
 155 /* Free a tok_state structure */
 156
 157 void
 158 PyTokenizer_Free(struct tok_state *tok)
 159 {
 160         if (tok->fp != NULL && tok->buf != NULL)
 161                 PyMem_DEL(tok->buf);
 162         PyMem_DEL(tok);
 163 }
 164
 165
 166 /* Get next char, updating state; error code goes into tok->done */
 167
 168 static int
 169 tok_nextc(register struct tok_state *tok)
 170 {
 171         for (;;) {
 172                 if (tok->cur != tok->inp) {
 173                         return Py_CHARMASK(*tok->cur++); /* Fast path */
 174                 }
 175                 if (tok->done != E_OK)
 176                         return EOF;
 177                 if (tok->fp == NULL) {
 178                         char *end = strchr(tok->inp, '\n');
 179                         if (end != NULL)
 180                                 end++;
 181                         else {
 182                                 end = strchr(tok->inp, '\0');
 183                                 if (end == tok->inp) {
 184                                         tok->done = E_EOF;
 185                                         return EOF;
 186                                 }
 187                         }
 188                         if (tok->start == NULL)
 189                                 tok->buf = tok->cur;
 190                         tok->lineno++;
 191                         tok->inp = end;
 192                         return Py_CHARMASK(*tok->cur++);
 193                 }
 194                 if (tok->prompt != NULL) {
 195                         char *new = PyOS_Readline(tok->prompt);
 196                         if (tok->nextprompt != NULL)
 197                                 tok->prompt = tok->nextprompt;
 198                         if (new == NULL)
 199                                 tok->done = E_INTR;
 200                         else if (*new == '\0') {
 201                                 PyMem_FREE(new);
 202                                 tok->done = E_EOF;
 203                         }
 204                         else if (tok->start != NULL) {
 205                                 size_t start = tok->start - tok->buf;
 206                                 size_t oldlen = tok->cur - tok->buf;
 207                                 size_t newlen = oldlen + strlen(new);
 208                                 char *buf = tok->buf;
 209                                 PyMem_RESIZE(buf, char, newlen+1);
 210                                 tok->lineno++;
 211                                 if (buf == NULL) {
 212                                         PyMem_DEL(tok->buf);
 213                                         tok->buf = NULL;
 214                                         PyMem_FREE(new);
 215                                         tok->done = E_NOMEM;
 216                                         return EOF;
 217                                 }
 218                                 tok->buf = buf;
 219                                 tok->cur = tok->buf + oldlen;
 220                                 strcpy(tok->buf + oldlen, new);
 221                                 PyMem_FREE(new);
 222                                 tok->inp = tok->buf + newlen;
 223                                 tok->end = tok->inp + 1;
 224                                 tok->start = tok->buf + start;
 225                         }
 226                         else {
 227                                 tok->lineno++;
 228                                 if (tok->buf != NULL)
 229                                         PyMem_DEL(tok->buf);
 230                                 tok->buf = new;
 231                                 tok->cur = tok->buf;
 232                                 tok->inp = strchr(tok->buf, '\0');
 233                                 tok->end = tok->inp + 1;
 234                         }
 235                 }
 236                 else {
 237                         int done = 0;
 238                         int cur = 0;
 239                         char *pt;
 240                         if (tok->start == NULL) {
 241                                 if (tok->buf == NULL) {
 242                                         tok->buf = PyMem_NEW(char, BUFSIZ);
 243                                         if (tok->buf == NULL) {
 244                                                 tok->done = E_NOMEM;
 245                                                 return EOF;
 246                                         }
 247                                         tok->end = tok->buf + BUFSIZ;
 248                                 }
 249                                 if (Py_UniversalNewlineFgets(tok->buf, (int)(tok->end - tok->buf),
 250                                           tok->fp, NULL) == NULL) {
 251                                         tok->done = E_EOF;
 252                                         done = 1;
 253                                 }
 254                                 else {
 255                                         tok->done = E_OK;
 256                                         tok->inp = strchr(tok->buf, '\0');
 257                                         done = tok->inp[-1] == '\n';
 258                                 }
 259                         }
 260                         else {
 261                                 cur = tok->cur - tok->buf;
 262                                 if (feof(tok->fp)) {
 263                                         tok->done = E_EOF;
 264                                         done = 1;
 265                                 }
 266                                 else
 267                                         tok->done = E_OK;
 268                         }
 269                         tok->lineno++;
 270                         /* Read until '\n' or EOF */
 271                         while (!done) {
 272                                 int curstart = tok->start == NULL ? -1 :
 273                                                tok->start - tok->buf;
 274                                 int curvalid = tok->inp - tok->buf;
 275                                 int newsize = curvalid + BUFSIZ;
 276                                 char *newbuf = tok->buf;
 277                                 PyMem_RESIZE(newbuf, char, newsize);
 278                                 if (newbuf == NULL) {
 279                                         tok->done = E_NOMEM;
 280                                         tok->cur = tok->inp;
 281                                         return EOF;
 282                                 }
 283                                 tok->buf = newbuf;
 284                                 tok->inp = tok->buf + curvalid;
 285                                 tok->end = tok->buf + newsize;
 286                                 tok->start = curstart < 0 ? NULL :
 287                                              tok->buf + curstart;
 288                                 if (Py_UniversalNewlineFgets(tok->inp,
 289                                                (int)(tok->end - tok->inp),
 290                                                tok->fp, NULL) == NULL) {
 291                                         /* Last line does not end in \n,
 292                                            fake one */
 293                                         strcpy(tok->inp, "\n");
 294                                 }
 295                                 tok->inp = strchr(tok->inp, '\0');
 296                                 done = tok->inp[-1] == '\n';
 297                         }
 298                         tok->cur = tok->buf + cur;
 299 #ifndef macintosh
 300                         /* replace "\r\n" with "\n" */
 301                         /* For Mac we leave the \r, giving a syntax error */
 302                         pt = tok->inp - 2;
 303                         if (pt >= tok->buf && *pt == '\r') {
 304                                 *pt++ = '\n';
 305                                 *pt = '\0';
 306                                 tok->inp = pt;
 307                         }
 308 #endif
 309                 }
 310                 if (tok->done != E_OK) {
 311                         if (tok->prompt != NULL)
 312                                 PySys_WriteStderr("\n");
 313                         tok->cur = tok->inp;
 314                         return EOF;
 315                 }
 316         }
 317         /*NOTREACHED*/
 318 }
 319
 320
 321 /* Back-up one character */
 322
 323 static void
 324 tok_backup(register struct tok_state *tok, register int c)
 325 {
 326         if (c != EOF) {
 327                 if (--tok->cur < tok->buf)
 328                         Py_FatalError("tok_backup: begin of buffer");
 329                 if (*tok->cur != c)
 330                         *tok->cur = c;
 331         }
 332 }
 333
 334
 335 /* Return the token corresponding to a single character */
 336
 337 int
 338 PyToken_OneChar(int c)
 339 {
 340         switch (c) {
 341         case '(':       return LPAR;
 342         case ')':       return RPAR;
 343         case '[':       return LSQB;
 344         case ']':       return RSQB;
 345         case ':':       return COLON;
 346         case ',':       return COMMA;
 347         case ';':       return SEMI;
 348         case '+':       return PLUS;
 349         case '-':       return MINUS;
 350         case '*':       return STAR;
 351         case '/':       return SLASH;
 352         case '|':       return VBAR;
 353         case '&':       return AMPER;
 354         case '<':       return LESS;
 355         case '>':       return GREATER;
 356         case '=':       return EQUAL;
 357         case '.':       return DOT;
 358         case '%':       return PERCENT;
 359         case '`':       return BACKQUOTE;
 360         case '{':       return LBRACE;
 361         case '}':       return RBRACE;
 362         case '^':       return CIRCUMFLEX;
 363         case '~':       return TILDE;
 364         default:        return OP;
 365         }
 366 }
 367
 368
 369 int
 370 PyToken_TwoChars(int c1, int c2)
 371 {
 372         switch (c1) {
 373         case '=':
 374                 switch (c2) {
 375                 case '=':       return EQEQUAL;
 376                 }
 377                 break;
 378         case '!':
 379                 switch (c2) {
 380                 case '=':       return NOTEQUAL;
 381                 }
 382                 break;
 383         case '<':
 384                 switch (c2) {
 385                 case '>':       return NOTEQUAL;
 386                 case '=':       return LESSEQUAL;
 387                 case '<':       return LEFTSHIFT;
 388                 }
 389                 break;
 390         case '>':
 391                 switch (c2) {
 392                 case '=':       return GREATEREQUAL;
 393                 case '>':       return RIGHTSHIFT;
 394                 }
 395                 break;
 396         case '+':
 397                 switch (c2) {
 398                 case '=':       return PLUSEQUAL;
 399                 }
 400                 break;
 401         case '-':
 402                 switch (c2) {
 403                 case '=':       return MINEQUAL;
 404                 }
 405                 break;
 406         case '*':
 407                 switch (c2) {
 408                 case '*':       return DOUBLESTAR;
 409                 case '=':       return STAREQUAL;
 410                 }
 411                 break;
 412         case '/':
 413                 switch (c2) {
 414                 case '/':       return DOUBLESLASH;
 415                 case '=':       return SLASHEQUAL;
 416                 }
 417                 break;
 418         case '|':
 419                 switch (c2) {
 420                 case '=':       return VBAREQUAL;
 421                 }
 422                 break;
 423         case '%':
 424                 switch (c2) {
 425                 case '=':       return PERCENTEQUAL;
 426                 }
 427                 break;
 428         case '&':
 429                 switch (c2) {
 430                 case '=':       return AMPEREQUAL;
 431                 }
 432                 break;
 433         case '^':
 434                 switch (c2) {
 435                 case '=':       return CIRCUMFLEXEQUAL;
 436                 }
 437                 break;
 438         }
 439         return OP;
 440 }
 441
 442 int
 443 PyToken_ThreeChars(int c1, int c2, int c3)
 444 {
 445         switch (c1) {
 446         case '<':
 447                 switch (c2) {
 448                 case '<':
 449                         switch (c3) {
 450                         case '=':
 451                                 return LEFTSHIFTEQUAL;
 452                         }
 453                         break;
 454                 }
 455                 break;
 456         case '>':
 457                 switch (c2) {
 458                 case '>':
 459                         switch (c3) {
 460                         case '=':
 461                                 return RIGHTSHIFTEQUAL;
 462                         }
 463                         break;
 464                 }
 465                 break;
 466         case '*':
 467                 switch (c2) {
 468                 case '*':
 469                         switch (c3) {
 470                         case '=':
 471                                 return DOUBLESTAREQUAL;
 472                         }
 473                         break;
 474                 }
 475                 break;
 476         case '/':
 477                 switch (c2) {
 478                 case '/':
 479                         switch (c3) {
 480                         case '=':
 481                                 return DOUBLESLASHEQUAL;
 482                         }
 483                         break;
 484                 }
 485                 break;
 486         }
 487         return OP;
 488 }
 489
 490 static int
 491 indenterror(struct tok_state *tok)
 492 {
 493         if (tok->alterror) {
 494                 tok->done = E_TABSPACE;
 495                 tok->cur = tok->inp;
 496                 return 1;
 497         }
 498         if (tok->altwarning) {
 499                 PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
 500                                   "in indentation\n", tok->filename);
 501                 tok->altwarning = 0;
 502         }
 503         return 0;
 504 }
 505
 506
 507 /* Get next token, after space stripping etc. */
 508
 509 int
 510 PyTokenizer_Get(register struct tok_state *tok, char **p_start,
 511                 char **p_end)
 512 {
 513         register int c;
 514         int blankline;
 515
 516         *p_start = *p_end = NULL;
 517   nextline:
 518         tok->start = NULL;
 519         blankline = 0;
 520
 521         /* Get indentation level */
 522         if (tok->atbol) {
 523                 register int col = 0;
 524                 register int altcol = 0;
 525                 tok->atbol = 0;
 526                 for (;;) {
 527                         c = tok_nextc(tok);
 528                         if (c == ' ')
 529                                 col++, altcol++;
 530                         else if (c == '\t') {
 531                                 col = (col/tok->tabsize + 1) * tok->tabsize;
 532                                 altcol = (altcol/tok->alttabsize + 1)
 533                                         * tok->alttabsize;
 534                         }
 535                         else if (c == '\014') /* Control-L (formfeed) */
 536                                 col = altcol = 0; /* For Emacs users */
 537                         else
 538                                 break;
 539                 }
 540                 tok_backup(tok, c);
 541                 if (c == '#' || c == '\n') {
 542                         /* Lines with only whitespace and/or comments
 543                            shouldn't affect the indentation and are
 544                            not passed to the parser as NEWLINE tokens,
 545                            except *totally* empty lines in interactive
 546                            mode, which signal the end of a command group. */
 547                         if (col == 0 && c == '\n' && tok->prompt != NULL)
 548                                 blankline = 0; /* Let it through */
 549                         else
 550                                 blankline = 1; /* Ignore completely */
 551                         /* We can't jump back right here since we still
 552                            may need to skip to the end of a comment */
 553                 }
 554                 if (!blankline && tok->level == 0) {
 555                         if (col == tok->indstack[tok->indent]) {
 556                                 /* No change */
 557                                 if (altcol != tok->altindstack[tok->indent]) {
 558                                         if (indenterror(tok))
 559                                                 return ERRORTOKEN;
 560                                 }
 561                         }
 562                         else if (col > tok->indstack[tok->indent]) {
 563                                 /* Indent -- always one */
 564                                 if (tok->indent+1 >= MAXINDENT) {
 565                                         tok->done = E_TOODEEP;
 566                                         tok->cur = tok->inp;
 567                                         return ERRORTOKEN;
 568                                 }
 569                                 if (altcol <= tok->altindstack[tok->indent]) {
 570                                         if (indenterror(tok))
 571                                                 return ERRORTOKEN;
 572                                 }
 573                                 tok->pendin++;
 574                                 tok->indstack[++tok->indent] = col;
 575                                 tok->altindstack[tok->indent] = altcol;
 576                         }
 577                         else /* col < tok->indstack[tok->indent] */ {
 578                                 /* Dedent -- any number, must be consistent */
 579                                 while (tok->indent > 0 &&
 580                                         col < tok->indstack[tok->indent]) {
 581                                         tok->pendin--;
 582                                         tok->indent--;
 583                                 }
 584                                 if (col != tok->indstack[tok->indent]) {
 585                                         tok->done = E_DEDENT;
 586                                         tok->cur = tok->inp;
 587                                         return ERRORTOKEN;
 588                                 }
 589                                 if (altcol != tok->altindstack[tok->indent]) {
 590                                         if (indenterror(tok))
 591                                                 return ERRORTOKEN;
 592                                 }
 593                         }
 594                 }
 595         }
 596
 597         tok->start = tok->cur;
 598
 599         /* Return pending indents/dedents */
 600         if (tok->pendin != 0) {
 601                 if (tok->pendin < 0) {
 602                         tok->pendin++;
 603                         return DEDENT;
 604                 }
 605                 else {
 606                         tok->pendin--;
 607                         return INDENT;
 608                 }
 609         }
 610
 611  again:
 612         tok->start = NULL;
 613         /* Skip spaces */
 614         do {
 615                 c = tok_nextc(tok);
 616         } while (c == ' ' || c == '\t' || c == '\014');
 617
 618         /* Set start of current token */
 619         tok->start = tok->cur - 1;
 620
 621         /* Skip comment, while looking for tab-setting magic */
 622         if (c == '#') {
 623                 static char *tabforms[] = {
 624                         "tab-width:",           /* Emacs */
 625                         ":tabstop=",            /* vim, full form */
 626                         ":ts=",                 /* vim, abbreviated form */
 627                         "set tabsize=",         /* will vi never die? */
 628                 /* more templates can be added here to support other editors */
 629                 };
 630                 char cbuf[80];
 631                 char *tp, **cp;
 632                 tp = cbuf;
 633                 do {
 634                         *tp++ = c = tok_nextc(tok);
 635                 } while (c != EOF && c != '\n' &&
 636                          tp - cbuf + 1 < sizeof(cbuf));
 637                 *tp = '\0';
 638                 for (cp = tabforms;
 639                      cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
 640                      cp++) {
 641                         if ((tp = strstr(cbuf, *cp))) {
 642                                 int newsize = atoi(tp + strlen(*cp));
 643
 644                                 if (newsize >= 1 && newsize <= 40) {
 645                                         tok->tabsize = newsize;
 646                                         if (Py_VerboseFlag)
 647                                             PySys_WriteStderr(
 648                                                 "Tab size set to %d\n",
 649                                                 newsize);
 650                                 }
 651                         }
 652                 }
 653                 while (c != EOF && c != '\n')
 654                         c = tok_nextc(tok);
 655         }
 656
 657         /* Check for EOF and errors now */
 658         if (c == EOF) {
 659                 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
 660         }
 661
 662         /* Identifier (most frequent token!) */
 663         if (isalpha(c) || c == '_') {
 664                 /* Process r"", u"" and ur"" */
 665                 switch (c) {
 666                 case 'r':
 667                 case 'R':
 668                         c = tok_nextc(tok);
 669                         if (c == '"' || c == '\'')
 670                                 goto letter_quote;
 671                         break;
 672                 case 'u':
 673                 case 'U':
 674                         c = tok_nextc(tok);
 675                         if (c == 'r' || c == 'R')
 676                                 c = tok_nextc(tok);
 677                         if (c == '"' || c == '\'')
 678                                 goto letter_quote;
 679                         break;
 680                 }
 681                 while (isalnum(c) || c == '_') {
 682                         c = tok_nextc(tok);
 683                 }
 684                 tok_backup(tok, c);
 685                 *p_start = tok->start;
 686                 *p_end = tok->cur;
 687                 return NAME;
 688         }
 689
 690         /* Newline */
 691         if (c == '\n') {
 692                 tok->atbol = 1;
 693                 if (blankline || tok->level > 0)
 694                         goto nextline;
 695                 *p_start = tok->start;
 696                 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
 697                 return NEWLINE;
 698         }
 699
 700 #ifdef macintosh
 701         if (c == '\r') {
 702                 PySys_WriteStderr(
 703                   "File contains \\r characters (incorrect line endings?)\n");
 704                 tok->done = E_TOKEN;
 705                 tok->cur = tok->inp;
 706                 return ERRORTOKEN;
 707         }
 708 #endif
 709         /* Period or number starting with period? */
 710         if (c == '.') {
 711                 c = tok_nextc(tok);
 712                 if (isdigit(c)) {
 713                         goto fraction;
 714                 }
 715                 else {
 716                         tok_backup(tok, c);
 717                         *p_start = tok->start;
 718                         *p_end = tok->cur;
 719                         return DOT;
 720                 }
 721         }
 722
 723         /* Number */
 724         if (isdigit(c)) {
 725                 if (c == '0') {
 726                         /* Hex or octal -- maybe. */
 727                         c = tok_nextc(tok);
 728                         if (c == '.')
 729                                 goto fraction;
 730 #ifndef WITHOUT_COMPLEX
 731                         if (c == 'j' || c == 'J')
 732                                 goto imaginary;
 733 #endif
 734                         if (c == 'x' || c == 'X') {
 735                                 /* Hex */
 736                                 do {
 737                                         c = tok_nextc(tok);
 738                                 } while (isxdigit(c));
 739                         }
 740                         else {
 741                                 int found_decimal = 0;
 742                                 /* Octal; c is first char of it */
 743                                 /* There's no 'isoctdigit' macro, sigh */
 744                                 while ('0' <= c && c < '8') {
 745                                         c = tok_nextc(tok);
 746                                 }
 747                                 if (isdigit(c)) {
 748                                         found_decimal = 1;
 749                                         do {
 750                                                 c = tok_nextc(tok);
 751                                         } while (isdigit(c));
 752                                 }
 753                                 if (c == '.')
 754                                         goto fraction;
 755                                 else if (c == 'e' || c == 'E')
 756                                         goto exponent;
 757 #ifndef WITHOUT_COMPLEX
 758                                 else if (c == 'j' || c == 'J')
 759                                         goto imaginary;
 760 #endif
 761                                 else if (found_decimal) {
 762                                         tok->done = E_TOKEN;
 763                                         tok_backup(tok, c);
 764                                         return ERRORTOKEN;
 765                                 }
 766                         }
 767                         if (c == 'l' || c == 'L')
 768                                 c = tok_nextc(tok);
 769                 }
 770                 else {
 771                         /* Decimal */
 772                         do {
 773                                 c = tok_nextc(tok);
 774                         } while (isdigit(c));
 775                         if (c == 'l' || c == 'L')
 776                                 c = tok_nextc(tok);
 777                         else {
 778                                 /* Accept floating point numbers. */
 779                                 if (c == '.') {
 780                 fraction:
 781                                         /* Fraction */
 782                                         do {
 783                                                 c = tok_nextc(tok);
 784                                         } while (isdigit(c));
 785                                 }
 786                                 if (c == 'e' || c == 'E') {
 787                 exponent:
 788                                         /* Exponent part */
 789                                         c = tok_nextc(tok);
 790                                         if (c == '+' || c == '-')
 791                                                 c = tok_nextc(tok);
 792                                         if (!isdigit(c)) {
 793                                                 tok->done = E_TOKEN;
 794                                                 tok_backup(tok, c);
 795                                                 return ERRORTOKEN;
 796                                         }
 797                                         do {
 798                                                 c = tok_nextc(tok);
 799                                         } while (isdigit(c));
 800                                 }
 801 #ifndef WITHOUT_COMPLEX
 802                                 if (c == 'j' || c == 'J')
 803                                         /* Imaginary part */
 804                 imaginary:
 805                                         c = tok_nextc(tok);
 806 #endif
 807                         }
 808                 }
 809                 tok_backup(tok, c);
 810                 *p_start = tok->start;
 811                 *p_end = tok->cur;
 812                 return NUMBER;
 813         }
 814
 815   letter_quote:
 816         /* String */
 817         if (c == '\'' || c == '"') {
 818                 int quote2 = tok->cur - tok->start + 1;
 819                 int quote = c;
 820                 int triple = 0;
 821                 int tripcount = 0;
 822                 for (;;) {
 823                         c = tok_nextc(tok);
 824                         if (c == '\n') {
 825                                 if (!triple) {
 826                                         tok->done = E_TOKEN;
 827                                         tok_backup(tok, c);
 828                                         return ERRORTOKEN;
 829                                 }
 830                                 tripcount = 0;
 831                         }
 832                         else if (c == EOF) {
 833                                 tok->done = E_TOKEN;
 834                                 tok->cur = tok->inp;
 835                                 return ERRORTOKEN;
 836                         }
 837                         else if (c == quote) {
 838                                 tripcount++;
 839                                 if (tok->cur - tok->start == quote2) {
 840                                         c = tok_nextc(tok);
 841                                         if (c == quote) {
 842                                                 triple = 1;
 843                                                 tripcount = 0;
 844                                                 continue;
 845                                         }
 846                                         tok_backup(tok, c);
 847                                 }
 848                                 if (!triple || tripcount == 3)
 849                                         break;
 850                         }
 851                         else if (c == '\\') {
 852                                 tripcount = 0;
 853                                 c = tok_nextc(tok);
 854                                 if (c == EOF) {
 855                                         tok->done = E_TOKEN;
 856                                         tok->cur = tok->inp;
 857                                         return ERRORTOKEN;
 858                                 }
 859                         }
 860                         else
 861                                 tripcount = 0;
 862                 }
 863                 *p_start = tok->start;
 864                 *p_end = tok->cur;
 865                 return STRING;
 866         }
 867
 868         /* Line continuation */
 869         if (c == '\\') {
 870                 c = tok_nextc(tok);
 871                 if (c != '\n') {
 872                         tok->done = E_TOKEN;
 873                         tok->cur = tok->inp;
 874                         return ERRORTOKEN;
 875                 }
 876                 goto again; /* Read next line */
 877         }
 878
 879         /* Check for two-character token */
 880         {
 881                 int c2 = tok_nextc(tok);
 882                 int token = PyToken_TwoChars(c, c2);
 883                 if (token != OP) {
 884                         int c3 = tok_nextc(tok);
 885                         int token3 = PyToken_ThreeChars(c, c2, c3);
 886                         if (token3 != OP) {
 887                                 token = token3;
 888                         } else {
 889                                 tok_backup(tok, c3);
 890                         }
 891                         *p_start = tok->start;
 892                         *p_end = tok->cur;
 893                         return token;
 894                 }
 895                 tok_backup(tok, c2);
 896         }
 897
 898         /* Keep track of parentheses nesting level */
 899         switch (c) {
 900         case '(':
 901         case '[':
 902         case '{':
 903                 tok->level++;
 904                 break;
 905         case ')':
 906         case ']':
 907         case '}':
 908                 tok->level--;
 909                 break;
 910         }
 911
 912         /* Punctuation character */
 913         *p_start = tok->start;
 914         *p_end = tok->cur;
 915         return PyToken_OneChar(c);
 916 }
 917
 918
 919 #ifdef Py_DEBUG
 920
 921 void
 922 tok_dump(int type, char *start, char *end)
 923 {
 924         printf("%s", _PyParser_TokenNames[type]);
 925         if (type == NAME || type == NUMBER || type == STRING || type == OP)
 926                 printf("(%.*s)", (int)(end - start), start);
 927 }
 928
 929 #endif