Parser/tokenizer.c

   1
   2 /* Tokenizer implementation */
   3
   4 #include "pgenheaders.h"
   5
   6 #include <ctype.h>
   7
   8 #include "tokenizer.h"
   9 #include "errcode.h"
  10
  11 extern char *PyOS_Readline(char *);
  12 /* Return malloc'ed string including trailing \n;
  13    empty malloc'ed string for EOF;
  14    NULL if interrupted */
  15
  16 /* Don't ever change this -- it would break the portability of Python code */
  17 #define TABSIZE 8
  18
  19 /* Convert a possibly signed character to a nonnegative int */
  20 /* XXX This assumes characters are 8 bits wide */
  21 #ifdef __CHAR_UNSIGNED__
  22 #define Py_CHARMASK(c)          (c)
  23 #else
  24 #define Py_CHARMASK(c)          ((c) & 0xff)
  25 #endif
  26
  27 /* Forward */
  28 static struct tok_state *tok_new(void);
  29 static int tok_nextc(struct tok_state *tok);
  30 static void tok_backup(struct tok_state *tok, int c);
  31
  32 /* Token names */
  33
  34 char *_PyParser_TokenNames[] = {
  35         "ENDMARKER",
  36         "NAME",
  37         "NUMBER",
  38         "STRING",
  39         "NEWLINE",
  40         "INDENT",
  41         "DEDENT",
  42         "LPAR",
  43         "RPAR",
  44         "LSQB",
  45         "RSQB",
  46         "COLON",
  47         "COMMA",
  48         "SEMI",
  49         "PLUS",
  50         "MINUS",
  51         "STAR",
  52         "SLASH",
  53         "VBAR",
  54         "AMPER",
  55         "LESS",
  56         "GREATER",
  57         "EQUAL",
  58         "DOT",
  59         "PERCENT",
  60         "BACKQUOTE",
  61         "LBRACE",
  62         "RBRACE",
  63         "EQEQUAL",
  64         "NOTEQUAL",
  65         "LESSEQUAL",
  66         "GREATEREQUAL",
  67         "TILDE",
  68         "CIRCUMFLEX",
  69         "LEFTSHIFT",
  70         "RIGHTSHIFT",
  71         "DOUBLESTAR",
  72         "PLUSEQUAL",
  73         "MINEQUAL",
  74         "STAREQUAL",
  75         "SLASHEQUAL",
  76         "PERCENTEQUAL",
  77         "AMPEREQUAL",
  78         "VBAREQUAL",
  79         "CIRCUMFLEXEQUAL",
  80         "LEFTSHIFTEQUAL",
  81         "RIGHTSHIFTEQUAL",
  82         "DOUBLESTAREQUAL",
  83         /* This table must match the #defines in token.h! */
  84         "OP",
  85         "<ERRORTOKEN>",
  86         "<N_TOKENS>"
  87 };
  88
  89
  90 /* Create and initialize a new tok_state structure */
  91
  92 static struct tok_state *
  93 tok_new(void)
  94 {
  95         struct tok_state *tok = PyMem_NEW(struct tok_state, 1);
  96         if (tok == NULL)
  97                 return NULL;
  98         tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
  99         tok->done = E_OK;
 100         tok->fp = NULL;
 101         tok->tabsize = TABSIZE;
 102         tok->indent = 0;
 103         tok->indstack[0] = 0;
 104         tok->atbol = 1;
 105         tok->pendin = 0;
 106         tok->prompt = tok->nextprompt = NULL;
 107         tok->lineno = 0;
 108         tok->level = 0;
 109         tok->filename = NULL;
 110         tok->altwarning = 0;
 111         tok->alterror = 0;
 112         tok->alttabsize = 1;
 113         tok->altindstack[0] = 0;
 114         return tok;
 115 }
 116
 117
 118 /* Set up tokenizer for string */
 119
 120 struct tok_state *
 121 PyTokenizer_FromString(char *str)
 122 {
 123         struct tok_state *tok = tok_new();
 124         if (tok == NULL)
 125                 return NULL;
 126         tok->buf = tok->cur = tok->end = tok->inp = str;
 127         return tok;
 128 }
 129
 130
 131 /* Set up tokenizer for file */
 132
 133 struct tok_state *
 134 PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
 135 {
 136         struct tok_state *tok = tok_new();
 137         if (tok == NULL)
 138                 return NULL;
 139         if ((tok->buf = PyMem_NEW(char, BUFSIZ)) == NULL) {
 140                 PyMem_DEL(tok);
 141                 return NULL;
 142         }
 143         tok->cur = tok->inp = tok->buf;
 144         tok->end = tok->buf + BUFSIZ;
 145         tok->fp = fp;
 146         tok->prompt = ps1;
 147         tok->nextprompt = ps2;
 148         return tok;
 149 }
 150
 151
 152 /* Free a tok_state structure */
 153
 154 void
 155 PyTokenizer_Free(struct tok_state *tok)
 156 {
 157         if (tok->fp != NULL && tok->buf != NULL)
 158                 PyMem_DEL(tok->buf);
 159         PyMem_DEL(tok);
 160 }
 161
 162
 163 /* Get next char, updating state; error code goes into tok->done */
 164
 165 static int
 166 tok_nextc(register struct tok_state *tok)
 167 {
 168         for (;;) {
 169                 if (tok->cur != tok->inp) {
 170                         return Py_CHARMASK(*tok->cur++); /* Fast path */
 171                 }
 172                 if (tok->done != E_OK)
 173                         return EOF;
 174                 if (tok->fp == NULL) {
 175                         char *end = strchr(tok->inp, '\n');
 176                         if (end != NULL)
 177                                 end++;
 178                         else {
 179                                 end = strchr(tok->inp, '\0');
 180                                 if (end == tok->inp) {
 181                                         tok->done = E_EOF;
 182                                         return EOF;
 183                                 }
 184                         }
 185                         if (tok->start == NULL)
 186                                 tok->buf = tok->cur;
 187                         tok->lineno++;
 188                         tok->inp = end;
 189                         return Py_CHARMASK(*tok->cur++);
 190                 }
 191                 if (tok->prompt != NULL) {
 192                         char *new = PyOS_Readline(tok->prompt);
 193                         if (tok->nextprompt != NULL)
 194                                 tok->prompt = tok->nextprompt;
 195                         if (new == NULL)
 196                                 tok->done = E_INTR;
 197                         else if (*new == '\0') {
 198                                 PyMem_FREE(new);
 199                                 tok->done = E_EOF;
 200                         }
 201                         else if (tok->start != NULL) {
 202                                 size_t start = tok->start - tok->buf;
 203                                 size_t oldlen = tok->cur - tok->buf;
 204                                 size_t newlen = oldlen + strlen(new);
 205                                 char *buf = tok->buf;
 206                                 PyMem_RESIZE(buf, char, newlen+1);
 207                                 tok->lineno++;
 208                                 if (buf == NULL) {
 209                                         PyMem_DEL(tok->buf);
 210                                         tok->buf = NULL;
 211                                         PyMem_FREE(new);
 212                                         tok->done = E_NOMEM;
 213                                         return EOF;
 214                                 }
 215                                 tok->buf = buf;
 216                                 tok->cur = tok->buf + oldlen;
 217                                 strcpy(tok->buf + oldlen, new);
 218                                 PyMem_FREE(new);
 219                                 tok->inp = tok->buf + newlen;
 220                                 tok->end = tok->inp + 1;
 221                                 tok->start = tok->buf + start;
 222                         }
 223                         else {
 224                                 tok->lineno++;
 225                                 if (tok->buf != NULL)
 226                                         PyMem_DEL(tok->buf);
 227                                 tok->buf = new;
 228                                 tok->cur = tok->buf;
 229                                 tok->inp = strchr(tok->buf, '\0');
 230                                 tok->end = tok->inp + 1;
 231                         }
 232                 }
 233                 else {
 234                         int done = 0;
 235                         int cur = 0;
 236                         char *pt;
 237                         if (tok->start == NULL) {
 238                                 if (tok->buf == NULL) {
 239                                         tok->buf = PyMem_NEW(char, BUFSIZ);
 240                                         if (tok->buf == NULL) {
 241                                                 tok->done = E_NOMEM;
 242                                                 return EOF;
 243                                         }
 244                                         tok->end = tok->buf + BUFSIZ;
 245                                 }
 246                                 if (fgets(tok->buf, (int)(tok->end - tok->buf),
 247                                           tok->fp) == NULL) {
 248                                         tok->done = E_EOF;
 249                                         done = 1;
 250                                 }
 251                                 else {
 252                                         tok->done = E_OK;
 253                                         tok->inp = strchr(tok->buf, '\0');
 254                                         done = tok->inp[-1] == '\n';
 255                                 }
 256                         }
 257                         else {
 258                                 cur = tok->cur - tok->buf;
 259                                 if (feof(tok->fp)) {
 260                                         tok->done = E_EOF;
 261                                         done = 1;
 262                                 }
 263                                 else
 264                                         tok->done = E_OK;
 265                         }
 266                         tok->lineno++;
 267                         /* Read until '\n' or EOF */
 268                         while (!done) {
 269                                 int curstart = tok->start == NULL ? -1 :
 270                                                tok->start - tok->buf;
 271                                 int curvalid = tok->inp - tok->buf;
 272                                 int newsize = curvalid + BUFSIZ;
 273                                 char *newbuf = tok->buf;
 274                                 PyMem_RESIZE(newbuf, char, newsize);
 275                                 if (newbuf == NULL) {
 276                                         tok->done = E_NOMEM;
 277                                         tok->cur = tok->inp;
 278                                         return EOF;
 279                                 }
 280                                 tok->buf = newbuf;
 281                                 tok->inp = tok->buf + curvalid;
 282                                 tok->end = tok->buf + newsize;
 283                                 tok->start = curstart < 0 ? NULL :
 284                                              tok->buf + curstart;
 285                                 if (fgets(tok->inp,
 286                                                (int)(tok->end - tok->inp),
 287                                                tok->fp) == NULL) {
 288                                         /* Last line does not end in \n,
 289                                            fake one */
 290                                         strcpy(tok->inp, "\n");
 291                                 }
 292                                 tok->inp = strchr(tok->inp, '\0');
 293                                 done = tok->inp[-1] == '\n';
 294                         }
 295                         tok->cur = tok->buf + cur;
 296 #ifndef macintosh
 297                         /* replace "\r\n" with "\n" */
 298                         /* For Mac we leave the \r, giving a syntax error */
 299                         pt = tok->inp - 2;
 300                         if (pt >= tok->buf && *pt == '\r') {
 301                                 *pt++ = '\n';
 302                                 *pt = '\0';
 303                                 tok->inp = pt;
 304                         }
 305 #endif
 306                 }
 307                 if (tok->done != E_OK) {
 308                         if (tok->prompt != NULL)
 309                                 PySys_WriteStderr("\n");
 310                         tok->cur = tok->inp;
 311                         return EOF;
 312                 }
 313         }
 314         /*NOTREACHED*/
 315 }
 316
 317
 318 /* Back-up one character */
 319
 320 static void
 321 tok_backup(register struct tok_state *tok, register int c)
 322 {
 323         if (c != EOF) {
 324                 if (--tok->cur < tok->buf)
 325                         Py_FatalError("tok_backup: begin of buffer");
 326                 if (*tok->cur != c)
 327                         *tok->cur = c;
 328         }
 329 }
 330
 331
 332 /* Return the token corresponding to a single character */
 333
 334 int
 335 PyToken_OneChar(int c)
 336 {
 337         switch (c) {
 338         case '(':       return LPAR;
 339         case ')':       return RPAR;
 340         case '[':       return LSQB;
 341         case ']':       return RSQB;
 342         case ':':       return COLON;
 343         case ',':       return COMMA;
 344         case ';':       return SEMI;
 345         case '+':       return PLUS;
 346         case '-':       return MINUS;
 347         case '*':       return STAR;
 348         case '/':       return SLASH;
 349         case '|':       return VBAR;
 350         case '&':       return AMPER;
 351         case '<':       return LESS;
 352         case '>':       return GREATER;
 353         case '=':       return EQUAL;
 354         case '.':       return DOT;
 355         case '%':       return PERCENT;
 356         case '`':       return BACKQUOTE;
 357         case '{':       return LBRACE;
 358         case '}':       return RBRACE;
 359         case '^':       return CIRCUMFLEX;
 360         case '~':       return TILDE;
 361         default:        return OP;
 362         }
 363 }
 364
 365
 366 int
 367 PyToken_TwoChars(int c1, int c2)
 368 {
 369         switch (c1) {
 370         case '=':
 371                 switch (c2) {
 372                 case '=':       return EQEQUAL;
 373                 }
 374                 break;
 375         case '!':
 376                 switch (c2) {
 377                 case '=':       return NOTEQUAL;
 378                 }
 379                 break;
 380         case '<':
 381                 switch (c2) {
 382                 case '>':       return NOTEQUAL;
 383                 case '=':       return LESSEQUAL;
 384                 case '<':       return LEFTSHIFT;
 385                 }
 386                 break;
 387         case '>':
 388                 switch (c2) {
 389                 case '=':       return GREATEREQUAL;
 390                 case '>':       return RIGHTSHIFT;
 391                 }
 392                 break;
 393         case '+':
 394                 switch (c2) {
 395                 case '=':       return PLUSEQUAL;
 396                 }
 397                 break;
 398         case '-':
 399                 switch (c2) {
 400                 case '=':       return MINEQUAL;
 401                 }
 402                 break;
 403         case '*':
 404                 switch (c2) {
 405                 case '*':       return DOUBLESTAR;
 406                 case '=':       return STAREQUAL;
 407                 }
 408                 break;
 409         case '/':
 410                 switch (c2) {
 411                 case '=':       return SLASHEQUAL;
 412                 }
 413                 break;
 414         case '|':
 415                 switch (c2) {
 416                 case '=':       return VBAREQUAL;
 417                 }
 418                 break;
 419         case '%':
 420                 switch (c2) {
 421                 case '=':       return PERCENTEQUAL;
 422                 }
 423                 break;
 424         case '&':
 425                 switch (c2) {
 426                 case '=':       return AMPEREQUAL;
 427                 }
 428                 break;
 429         case '^':
 430                 switch (c2) {
 431                 case '=':       return CIRCUMFLEXEQUAL;
 432                 }
 433                 break;
 434         }
 435         return OP;
 436 }
 437
 438 int
 439 PyToken_ThreeChars(int c1, int c2, int c3)
 440 {
 441         switch (c1) {
 442         case '<':
 443                 switch (c2) {
 444                 case '<':
 445                         switch (c3) {
 446                         case '=':
 447                                 return LEFTSHIFTEQUAL;
 448                                 break;
 449                         }
 450                         break;
 451                 }
 452                 break;
 453         case '>':
 454                 switch (c2) {
 455                 case '>':
 456                         switch (c3) {
 457                         case '=':
 458                                 return RIGHTSHIFTEQUAL;
 459                                 break;
 460                         }
 461                         break;
 462                 }
 463                 break;
 464         case '*':
 465                 switch (c2) {
 466                 case '*':
 467                         switch (c3) {
 468                         case '=':
 469                                 return DOUBLESTAREQUAL;
 470                                 break;
 471                         }
 472                         break;
 473                 }
 474                 break;
 475         }
 476         return OP;
 477 }
 478
 479 static int
 480 indenterror(struct tok_state *tok)
 481 {
 482         if (tok->alterror) {
 483                 tok->done = E_TABSPACE;
 484                 tok->cur = tok->inp;
 485                 return 1;
 486         }
 487         if (tok->altwarning) {
 488                 PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
 489                                   "in indentation\n", tok->filename);
 490                 tok->altwarning = 0;
 491         }
 492         return 0;
 493 }
 494
 495
 496 /* Get next token, after space stripping etc. */
 497
 498 int
 499 PyTokenizer_Get(register struct tok_state *tok, char **p_start,
 500                 char **p_end)
 501 {
 502         register int c;
 503         int blankline;
 504
 505         *p_start = *p_end = NULL;
 506   nextline:
 507         tok->start = NULL;
 508         blankline = 0;
 509
 510         /* Get indentation level */
 511         if (tok->atbol) {
 512                 register int col = 0;
 513                 register int altcol = 0;
 514                 tok->atbol = 0;
 515                 for (;;) {
 516                         c = tok_nextc(tok);
 517                         if (c == ' ')
 518                                 col++, altcol++;
 519                         else if (c == '\t') {
 520                                 col = (col/tok->tabsize + 1) * tok->tabsize;
 521                                 altcol = (altcol/tok->alttabsize + 1)
 522                                         * tok->alttabsize;
 523                         }
 524                         else if (c == '\014') /* Control-L (formfeed) */
 525                                 col = altcol = 0; /* For Emacs users */
 526                         else
 527                                 break;
 528                 }
 529                 tok_backup(tok, c);
 530                 if (c == '#' || c == '\n') {
 531                         /* Lines with only whitespace and/or comments
 532                            shouldn't affect the indentation and are
 533                            not passed to the parser as NEWLINE tokens,
 534                            except *totally* empty lines in interactive
 535                            mode, which signal the end of a command group. */
 536                         if (col == 0 && c == '\n' && tok->prompt != NULL)
 537                                 blankline = 0; /* Let it through */
 538                         else
 539                                 blankline = 1; /* Ignore completely */
 540                         /* We can't jump back right here since we still
 541                            may need to skip to the end of a comment */
 542                 }
 543                 if (!blankline && tok->level == 0) {
 544                         if (col == tok->indstack[tok->indent]) {
 545                                 /* No change */
 546                                 if (altcol != tok->altindstack[tok->indent]) {
 547                                         if (indenterror(tok))
 548                                                 return ERRORTOKEN;
 549                                 }
 550                         }
 551                         else if (col > tok->indstack[tok->indent]) {
 552                                 /* Indent -- always one */
 553                                 if (tok->indent+1 >= MAXINDENT) {
 554                                         tok->done = E_TOODEEP;
 555                                         tok->cur = tok->inp;
 556                                         return ERRORTOKEN;
 557                                 }
 558                                 if (altcol <= tok->altindstack[tok->indent]) {
 559                                         if (indenterror(tok))
 560                                                 return ERRORTOKEN;
 561                                 }
 562                                 tok->pendin++;
 563                                 tok->indstack[++tok->indent] = col;
 564                                 tok->altindstack[tok->indent] = altcol;
 565                         }
 566                         else /* col < tok->indstack[tok->indent] */ {
 567                                 /* Dedent -- any number, must be consistent */
 568                                 while (tok->indent > 0 &&
 569                                         col < tok->indstack[tok->indent]) {
 570                                         tok->pendin--;
 571                                         tok->indent--;
 572                                 }
 573                                 if (col != tok->indstack[tok->indent]) {
 574                                         tok->done = E_DEDENT;
 575                                         tok->cur = tok->inp;
 576                                         return ERRORTOKEN;
 577                                 }
 578                                 if (altcol != tok->altindstack[tok->indent]) {
 579                                         if (indenterror(tok))
 580                                                 return ERRORTOKEN;
 581                                 }
 582                         }
 583                 }
 584         }
 585
 586         tok->start = tok->cur;
 587
 588         /* Return pending indents/dedents */
 589         if (tok->pendin != 0) {
 590                 if (tok->pendin < 0) {
 591                         tok->pendin++;
 592                         return DEDENT;
 593                 }
 594                 else {
 595                         tok->pendin--;
 596                         return INDENT;
 597                 }
 598         }
 599
 600  again:
 601         tok->start = NULL;
 602         /* Skip spaces */
 603         do {
 604                 c = tok_nextc(tok);
 605         } while (c == ' ' || c == '\t' || c == '\014');
 606
 607         /* Set start of current token */
 608         tok->start = tok->cur - 1;
 609
 610         /* Skip comment, while looking for tab-setting magic */
 611         if (c == '#') {
 612                 static char *tabforms[] = {
 613                         "tab-width:",           /* Emacs */
 614                         ":tabstop=",            /* vim, full form */
 615                         ":ts=",                 /* vim, abbreviated form */
 616                         "set tabsize=",         /* will vi never die? */
 617                 /* more templates can be added here to support other editors */
 618                 };
 619                 char cbuf[80];
 620                 char *tp, **cp;
 621                 tp = cbuf;
 622                 do {
 623                         *tp++ = c = tok_nextc(tok);
 624                 } while (c != EOF && c != '\n' &&
 625                          tp - cbuf + 1 < sizeof(cbuf));
 626                 *tp = '\0';
 627                 for (cp = tabforms;
 628                      cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
 629                      cp++) {
 630                         if ((tp = strstr(cbuf, *cp))) {
 631                                 int newsize = atoi(tp + strlen(*cp));
 632
 633                                 if (newsize >= 1 && newsize <= 40) {
 634                                         tok->tabsize = newsize;
 635                                         if (Py_VerboseFlag)
 636                                             PySys_WriteStderr(
 637                                                 "Tab size set to %d\n",
 638                                                 newsize);
 639                                 }
 640                         }
 641                 }
 642                 while (c != EOF && c != '\n')
 643                         c = tok_nextc(tok);
 644         }
 645
 646         /* Check for EOF and errors now */
 647         if (c == EOF) {
 648                 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
 649         }
 650
 651         /* Identifier (most frequent token!) */
 652         if (isalpha(c) || c == '_') {
 653                 /* Process r"", u"" and ur"" */
 654                 switch (c) {
 655                 case 'r':
 656                 case 'R':
 657                         c = tok_nextc(tok);
 658                         if (c == '"' || c == '\'')
 659                                 goto letter_quote;
 660                         break;
 661                 case 'u':
 662                 case 'U':
 663                         c = tok_nextc(tok);
 664                         if (c == 'r' || c == 'R')
 665                                 c = tok_nextc(tok);
 666                         if (c == '"' || c == '\'')
 667                                 goto letter_quote;
 668                         break;
 669                 }
 670                 while (isalnum(c) || c == '_') {
 671                         c = tok_nextc(tok);
 672                 }
 673                 tok_backup(tok, c);
 674                 *p_start = tok->start;
 675                 *p_end = tok->cur;
 676                 return NAME;
 677         }
 678
 679         /* Newline */
 680         if (c == '\n') {
 681                 tok->atbol = 1;
 682                 if (blankline || tok->level > 0)
 683                         goto nextline;
 684                 *p_start = tok->start;
 685                 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
 686                 return NEWLINE;
 687         }
 688
 689 #ifdef macintosh
 690         if (c == '\r') {
 691                 PySys_WriteStderr(
 692                   "File contains \\r characters (incorrect line endings?)\n");
 693                 tok->done = E_TOKEN;
 694                 tok->cur = tok->inp;
 695                 return ERRORTOKEN;
 696         }
 697 #endif
 698         /* Period or number starting with period? */
 699         if (c == '.') {
 700                 c = tok_nextc(tok);
 701                 if (isdigit(c)) {
 702                         goto fraction;
 703                 }
 704                 else {
 705                         tok_backup(tok, c);
 706                         *p_start = tok->start;
 707                         *p_end = tok->cur;
 708                         return DOT;
 709                 }
 710         }
 711
 712         /* Number */
 713         if (isdigit(c)) {
 714                 if (c == '0') {
 715                         /* Hex or octal */
 716                         c = tok_nextc(tok);
 717                         if (c == '.')
 718                                 goto fraction;
 719 #ifndef WITHOUT_COMPLEX
 720                         if (c == 'j' || c == 'J')
 721                                 goto imaginary;
 722 #endif
 723                         if (c == 'x' || c == 'X') {
 724                                 /* Hex */
 725                                 do {
 726                                         c = tok_nextc(tok);
 727                                 } while (isxdigit(c));
 728                         }
 729                         else {
 730                                 /* XXX This is broken!  E.g.,
 731                                    09.9 should be accepted as float! */
 732                                 /* Octal; c is first char of it */
 733                                 /* There's no 'isoctdigit' macro, sigh */
 734                                 while ('0' <= c && c < '8') {
 735                                         c = tok_nextc(tok);
 736                                 }
 737                         }
 738                         if (c == 'l' || c == 'L')
 739                                 c = tok_nextc(tok);
 740                 }
 741                 else {
 742                         /* Decimal */
 743                         do {
 744                                 c = tok_nextc(tok);
 745                         } while (isdigit(c));
 746                         if (c == 'l' || c == 'L')
 747                                 c = tok_nextc(tok);
 748                         else {
 749                                 /* Accept floating point numbers.
 750                                    XXX This accepts incomplete things like
 751                                    XXX 12e or 1e+; worry run-time */
 752                                 if (c == '.') {
 753                 fraction:
 754                                         /* Fraction */
 755                                         do {
 756                                                 c = tok_nextc(tok);
 757                                         } while (isdigit(c));
 758                                 }
 759                                 if (c == 'e' || c == 'E') {
 760                                         /* Exponent part */
 761                                         c = tok_nextc(tok);
 762                                         if (c == '+' || c == '-')
 763                                                 c = tok_nextc(tok);
 764                                         while (isdigit(c)) {
 765                                                 c = tok_nextc(tok);
 766                                         }
 767                                 }
 768 #ifndef WITHOUT_COMPLEX
 769                                 if (c == 'j' || c == 'J')
 770                                         /* Imaginary part */
 771                 imaginary:
 772                                         c = tok_nextc(tok);
 773 #endif
 774                         }
 775                 }
 776                 tok_backup(tok, c);
 777                 *p_start = tok->start;
 778                 *p_end = tok->cur;
 779                 return NUMBER;
 780         }
 781
 782   letter_quote:
 783         /* String */
 784         if (c == '\'' || c == '"') {
 785                 int quote2 = tok->cur - tok->start + 1;
 786                 int quote = c;
 787                 int triple = 0;
 788                 int tripcount = 0;
 789                 for (;;) {
 790                         c = tok_nextc(tok);
 791                         if (c == '\n') {
 792                                 if (!triple) {
 793                                         tok->done = E_TOKEN;
 794                                         tok_backup(tok, c);
 795                                         return ERRORTOKEN;
 796                                 }
 797                                 tripcount = 0;
 798                         }
 799                         else if (c == EOF) {
 800                                 tok->done = E_TOKEN;
 801                                 tok->cur = tok->inp;
 802                                 return ERRORTOKEN;
 803                         }
 804                         else if (c == quote) {
 805                                 tripcount++;
 806                                 if (tok->cur - tok->start == quote2) {
 807                                         c = tok_nextc(tok);
 808                                         if (c == quote) {
 809                                                 triple = 1;
 810                                                 tripcount = 0;
 811                                                 continue;
 812                                         }
 813                                         tok_backup(tok, c);
 814                                 }
 815                                 if (!triple || tripcount == 3)
 816                                         break;
 817                         }
 818                         else if (c == '\\') {
 819                                 tripcount = 0;
 820                                 c = tok_nextc(tok);
 821                                 if (c == EOF) {
 822                                         tok->done = E_TOKEN;
 823                                         tok->cur = tok->inp;
 824                                         return ERRORTOKEN;
 825                                 }
 826                         }
 827                         else
 828                                 tripcount = 0;
 829                 }
 830                 *p_start = tok->start;
 831                 *p_end = tok->cur;
 832                 return STRING;
 833         }
 834
 835         /* Line continuation */
 836         if (c == '\\') {
 837                 c = tok_nextc(tok);
 838                 if (c != '\n') {
 839                         tok->done = E_TOKEN;
 840                         tok->cur = tok->inp;
 841                         return ERRORTOKEN;
 842                 }
 843                 goto again; /* Read next line */
 844         }
 845
 846         /* Check for two-character token */
 847         {
 848                 int c2 = tok_nextc(tok);
 849                 int token = PyToken_TwoChars(c, c2);
 850                 if (token != OP) {
 851                         int c3 = tok_nextc(tok);
 852                         int token3 = PyToken_ThreeChars(c, c2, c3);
 853                         if (token3 != OP) {
 854                                 token = token3;
 855                         } else {
 856                                 tok_backup(tok, c3);
 857                         }
 858                         *p_start = tok->start;
 859                         *p_end = tok->cur;
 860                         return token;
 861                 }
 862                 tok_backup(tok, c2);
 863         }
 864
 865         /* Keep track of parentheses nesting level */
 866         switch (c) {
 867         case '(':
 868         case '[':
 869         case '{':
 870                 tok->level++;
 871                 break;
 872         case ')':
 873         case ']':
 874         case '}':
 875                 tok->level--;
 876                 break;
 877         }
 878
 879         /* Punctuation character */
 880         *p_start = tok->start;
 881         *p_end = tok->cur;
 882         return PyToken_OneChar(c);
 883 }
 884
 885
 886 #ifdef Py_DEBUG
 887
 888 void
 889 tok_dump(int type, char *start, char *end)
 890 {
 891         printf("%s", _PyParser_TokenNames[type]);
 892         if (type == NAME || type == NUMBER || type == STRING || type == OP)
 893                 printf("(%.*s)", (int)(end - start), start);
 894 }
 895
 896 #endif