Parser/tokenizer.c

   1 /***********************************************************
   2 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
   3 The Netherlands.
   4
   5                         All Rights Reserved
   6
   7 Permission to use, copy, modify, and distribute this software and its
   8 documentation for any purpose and without fee is hereby granted,
   9 provided that the above copyright notice appear in all copies and that
  10 both that copyright notice and this permission notice appear in
  11 supporting documentation, and that the names of Stichting Mathematisch
  12 Centrum or CWI or Corporation for National Research Initiatives or
  13 CNRI not be used in advertising or publicity pertaining to
  14 distribution of the software without specific, written prior
  15 permission.
  16
  17 While CWI is the initial source for this software, a modified version
  18 is made available by the Corporation for National Research Initiatives
  19 (CNRI) at the Internet address ftp://ftp.python.org.
  20
  21 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
  22 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
  23 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
  24 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
  25 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
  26 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
  27 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  28 PERFORMANCE OF THIS SOFTWARE.
  29
  30 ******************************************************************/
  31
  32 /* Tokenizer implementation */
  33
  34 #include "pgenheaders.h"
  35
  36 #include <ctype.h>
  37
  38 #include "tokenizer.h"
  39 #include "errcode.h"
  40
  41 extern char *PyOS_Readline Py_PROTO((char *));
  42 /* Return malloc'ed string including trailing \n;
  43    empty malloc'ed string for EOF;
  44    NULL if interrupted */
  45
  46 /* Don't ever change this -- it would break the portability of Python code */
  47 #define TABSIZE 8
  48
  49 /* Convert a possibly signed character to a nonnegative int */
  50 /* XXX This assumes characters are 8 bits wide */
  51 #ifdef __CHAR_UNSIGNED__
  52 #define Py_CHARMASK(c)          (c)
  53 #else
  54 #define Py_CHARMASK(c)          ((c) & 0xff)
  55 #endif
  56
  57 /* Forward */
  58 static struct tok_state *tok_new Py_PROTO((void));
  59 static int tok_nextc Py_PROTO((struct tok_state *tok));
  60 static void tok_backup Py_PROTO((struct tok_state *tok, int c));
  61
  62 /* Token names */
  63
  64 char *_PyParser_TokenNames[] = {
  65         "ENDMARKER",
  66         "NAME",
  67         "NUMBER",
  68         "STRING",
  69         "NEWLINE",
  70         "INDENT",
  71         "DEDENT",
  72         "LPAR",
  73         "RPAR",
  74         "LSQB",
  75         "RSQB",
  76         "COLON",
  77         "COMMA",
  78         "SEMI",
  79         "PLUS",
  80         "MINUS",
  81         "STAR",
  82         "SLASH",
  83         "VBAR",
  84         "AMPER",
  85         "LESS",
  86         "GREATER",
  87         "EQUAL",
  88         "DOT",
  89         "PERCENT",
  90         "BACKQUOTE",
  91         "LBRACE",
  92         "RBRACE",
  93         "EQEQUAL",
  94         "NOTEQUAL",
  95         "LESSEQUAL",
  96         "GREATEREQUAL",
  97         "TILDE",
  98         "CIRCUMFLEX",
  99         "LEFTSHIFT",
 100         "RIGHTSHIFT",
 101         "DOUBLESTAR",
 102         /* This table must match the #defines in token.h! */
 103         "OP",
 104         "<ERRORTOKEN>",
 105         "<N_TOKENS>"
 106 };
 107
 108
 109 /* Create and initialize a new tok_state structure */
 110
 111 static struct tok_state *
 112 tok_new()
 113 {
 114         struct tok_state *tok = PyMem_NEW(struct tok_state, 1);
 115         if (tok == NULL)
 116                 return NULL;
 117         tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
 118         tok->done = E_OK;
 119         tok->fp = NULL;
 120         tok->tabsize = TABSIZE;
 121         tok->indent = 0;
 122         tok->indstack[0] = 0;
 123         tok->atbol = 1;
 124         tok->pendin = 0;
 125         tok->prompt = tok->nextprompt = NULL;
 126         tok->lineno = 0;
 127         tok->level = 0;
 128         tok->filename = NULL;
 129         tok->altwarning = 0;
 130         tok->alterror = 0;
 131         tok->alttabsize = 1;
 132         tok->altindstack[0] = 0;
 133         return tok;
 134 }
 135
 136
 137 /* Set up tokenizer for string */
 138
 139 struct tok_state *
 140 PyTokenizer_FromString(str)
 141         char *str;
 142 {
 143         struct tok_state *tok = tok_new();
 144         if (tok == NULL)
 145                 return NULL;
 146         tok->buf = tok->cur = tok->end = tok->inp = str;
 147         return tok;
 148 }
 149
 150
 151 /* Set up tokenizer for file */
 152
 153 struct tok_state *
 154 PyTokenizer_FromFile(fp, ps1, ps2)
 155         FILE *fp;
 156         char *ps1, *ps2;
 157 {
 158         struct tok_state *tok = tok_new();
 159         if (tok == NULL)
 160                 return NULL;
 161         if ((tok->buf = PyMem_NEW(char, BUFSIZ)) == NULL) {
 162                 PyMem_DEL(tok);
 163                 return NULL;
 164         }
 165         tok->cur = tok->inp = tok->buf;
 166         tok->end = tok->buf + BUFSIZ;
 167         tok->fp = fp;
 168         tok->prompt = ps1;
 169         tok->nextprompt = ps2;
 170         return tok;
 171 }
 172
 173
 174 /* Free a tok_state structure */
 175
 176 void
 177 PyTokenizer_Free(tok)
 178         struct tok_state *tok;
 179 {
 180         if (tok->fp != NULL && tok->buf != NULL)
 181                 PyMem_DEL(tok->buf);
 182         PyMem_DEL(tok);
 183 }
 184
 185
 186 /* Get next char, updating state; error code goes into tok->done */
 187
 188 static int
 189 tok_nextc(tok)
 190         register struct tok_state *tok;
 191 {
 192         for (;;) {
 193                 if (tok->cur != tok->inp) {
 194                         return Py_CHARMASK(*tok->cur++); /* Fast path */
 195                 }
 196                 if (tok->done != E_OK)
 197                         return EOF;
 198                 if (tok->fp == NULL) {
 199                         char *end = strchr(tok->inp, '\n');
 200                         if (end != NULL)
 201                                 end++;
 202                         else {
 203                                 end = strchr(tok->inp, '\0');
 204                                 if (end == tok->inp) {
 205                                         tok->done = E_EOF;
 206                                         return EOF;
 207                                 }
 208                         }
 209                         if (tok->start == NULL)
 210                                 tok->buf = tok->cur;
 211                         tok->lineno++;
 212                         tok->inp = end;
 213                         return Py_CHARMASK(*tok->cur++);
 214                 }
 215                 if (tok->prompt != NULL) {
 216                         char *new = PyOS_Readline(tok->prompt);
 217                         if (tok->nextprompt != NULL)
 218                                 tok->prompt = tok->nextprompt;
 219                         if (new == NULL)
 220                                 tok->done = E_INTR;
 221                         else if (*new == '\0') {
 222                                 free(new);
 223                                 tok->done = E_EOF;
 224                         }
 225                         else if (tok->start != NULL) {
 226                                 int start = tok->start - tok->buf;
 227                                 int oldlen = tok->cur - tok->buf;
 228                                 int newlen = oldlen + strlen(new);
 229                                 char *buf = realloc(tok->buf, newlen+1);
 230                                 tok->lineno++;
 231                                 if (buf == NULL) {
 232                                         free(tok->buf);
 233                                         tok->buf = NULL;
 234                                         free(new);
 235                                         tok->done = E_NOMEM;
 236                                         return EOF;
 237                                 }
 238                                 tok->buf = buf;
 239                                 tok->cur = tok->buf + oldlen;
 240                                 strcpy(tok->buf + oldlen, new);
 241                                 free(new);
 242                                 tok->inp = tok->buf + newlen;
 243                                 tok->end = tok->inp + 1;
 244                                 tok->start = tok->buf + start;
 245                         }
 246                         else {
 247                                 tok->lineno++;
 248                                 if (tok->buf != NULL)
 249                                         free(tok->buf);
 250                                 tok->buf = new;
 251                                 tok->cur = tok->buf;
 252                                 tok->inp = strchr(tok->buf, '\0');
 253                                 tok->end = tok->inp + 1;
 254                         }
 255                 }
 256                 else {
 257                         int done = 0;
 258                         int cur = 0;
 259                         char *pt;
 260                         if (tok->start == NULL) {
 261                                 if (tok->buf == NULL) {
 262                                         tok->buf = PyMem_NEW(char, BUFSIZ);
 263                                         if (tok->buf == NULL) {
 264                                                 tok->done = E_NOMEM;
 265                                                 return EOF;
 266                                         }
 267                                         tok->end = tok->buf + BUFSIZ;
 268                                 }
 269                                 if (fgets(tok->buf, (int)(tok->end - tok->buf),
 270                                           tok->fp) == NULL) {
 271                                         tok->done = E_EOF;
 272                                         done = 1;
 273                                 }
 274                                 else {
 275                                         tok->done = E_OK;
 276                                         tok->inp = strchr(tok->buf, '\0');
 277                                         done = tok->inp[-1] == '\n';
 278                                 }
 279                         }
 280                         else {
 281                                 cur = tok->cur - tok->buf;
 282                                 if (feof(tok->fp)) {
 283                                         tok->done = E_EOF;
 284                                         done = 1;
 285                                 }
 286                                 else
 287                                         tok->done = E_OK;
 288                         }
 289                         tok->lineno++;
 290                         /* Read until '\n' or EOF */
 291                         while (!done) {
 292                                 int curstart = tok->start == NULL ? -1 :
 293                                                tok->start - tok->buf;
 294                                 int curvalid = tok->inp - tok->buf;
 295                                 int newsize = curvalid + BUFSIZ;
 296                                 char *newbuf = tok->buf;
 297                                 PyMem_RESIZE(newbuf, char, newsize);
 298                                 if (newbuf == NULL) {
 299                                         tok->done = E_NOMEM;
 300                                         tok->cur = tok->inp;
 301                                         return EOF;
 302                                 }
 303                                 tok->buf = newbuf;
 304                                 tok->inp = tok->buf + curvalid;
 305                                 tok->end = tok->buf + newsize;
 306                                 tok->start = curstart < 0 ? NULL :
 307                                              tok->buf + curstart;
 308                                 if (fgets(tok->inp,
 309                                                (int)(tok->end - tok->inp),
 310                                                tok->fp) == NULL) {
 311                                         /* Last line does not end in \n,
 312                                            fake one */
 313                                         strcpy(tok->inp, "\n");
 314                                 }
 315                                 tok->inp = strchr(tok->inp, '\0');
 316                                 done = tok->inp[-1] == '\n';
 317                         }
 318                         tok->cur = tok->buf + cur;
 319 #ifndef macintosh
 320                         /* replace "\r\n" with "\n" */
 321                         /* For Mac we leave the \r, giving a syntax error */
 322                         pt = tok->inp - 2;
 323                         if (pt >= tok->buf && *pt == '\r') {
 324                                 *pt++ = '\n';
 325                                 *pt = '\0';
 326                                 tok->inp = pt;
 327                         }
 328 #endif
 329                 }
 330                 if (tok->done != E_OK) {
 331                         if (tok->prompt != NULL)
 332                                 fprintf(stderr, "\n");
 333                         tok->cur = tok->inp;
 334                         return EOF;
 335                 }
 336         }
 337         /*NOTREACHED*/
 338 }
 339
 340
 341 /* Back-up one character */
 342
 343 static void
 344 tok_backup(tok, c)
 345         register struct tok_state *tok;
 346         register int c;
 347 {
 348         if (c != EOF) {
 349                 if (--tok->cur < tok->buf)
 350                         Py_FatalError("tok_backup: begin of buffer");
 351                 if (*tok->cur != c)
 352                         *tok->cur = c;
 353         }
 354 }
 355
 356
 357 /* Return the token corresponding to a single character */
 358
 359 int
 360 PyToken_OneChar(c)
 361         int c;
 362 {
 363         switch (c) {
 364         case '(':       return LPAR;
 365         case ')':       return RPAR;
 366         case '[':       return LSQB;
 367         case ']':       return RSQB;
 368         case ':':       return COLON;
 369         case ',':       return COMMA;
 370         case ';':       return SEMI;
 371         case '+':       return PLUS;
 372         case '-':       return MINUS;
 373         case '*':       return STAR;
 374         case '/':       return SLASH;
 375         case '|':       return VBAR;
 376         case '&':       return AMPER;
 377         case '<':       return LESS;
 378         case '>':       return GREATER;
 379         case '=':       return EQUAL;
 380         case '.':       return DOT;
 381         case '%':       return PERCENT;
 382         case '`':       return BACKQUOTE;
 383         case '{':       return LBRACE;
 384         case '}':       return RBRACE;
 385         case '^':       return CIRCUMFLEX;
 386         case '~':       return TILDE;
 387         default:        return OP;
 388         }
 389 }
 390
 391
 392 int
 393 PyToken_TwoChars(c1, c2)
 394         int c1, c2;
 395 {
 396         switch (c1) {
 397         case '=':
 398                 switch (c2) {
 399                 case '=':       return EQEQUAL;
 400                 }
 401                 break;
 402         case '!':
 403                 switch (c2) {
 404                 case '=':       return NOTEQUAL;
 405                 }
 406                 break;
 407         case '<':
 408                 switch (c2) {
 409                 case '>':       return NOTEQUAL;
 410                 case '=':       return LESSEQUAL;
 411                 case '<':       return LEFTSHIFT;
 412                 }
 413                 break;
 414         case '>':
 415                 switch (c2) {
 416                 case '=':       return GREATEREQUAL;
 417                 case '>':       return RIGHTSHIFT;
 418                 }
 419                 break;
 420         case '*':
 421                 switch (c2) {
 422                 case '*':       return DOUBLESTAR;
 423                 }
 424                 break;
 425         }
 426         return OP;
 427 }
 428
 429
 430 static int
 431 indenterror(tok)
 432         struct tok_state *tok;
 433 {
 434         if (tok->alterror) {
 435                 tok->done = E_INDENT;
 436                 tok->cur = tok->inp;
 437                 return 1;
 438         }
 439         if (tok->altwarning) {
 440                 fprintf(stderr, "%s: inconsistent tab/space usage\n",
 441                         tok->filename);
 442                 tok->altwarning = 0;
 443         }
 444         return 0;
 445 }
 446
 447
 448 /* Get next token, after space stripping etc. */
 449
 450 int
 451 PyTokenizer_Get(tok, p_start, p_end)
 452         register struct tok_state *tok; /* In/out: tokenizer state */
 453         char **p_start, **p_end; /* Out: point to start/end of token */
 454 {
 455         register int c;
 456         int blankline;
 457
 458         *p_start = *p_end = NULL;
 459   nextline:
 460         tok->start = NULL;
 461         blankline = 0;
 462
 463         /* Get indentation level */
 464         if (tok->atbol) {
 465                 register int col = 0;
 466                 register int altcol = 0;
 467                 tok->atbol = 0;
 468                 for (;;) {
 469                         c = tok_nextc(tok);
 470                         if (c == ' ')
 471                                 col++, altcol++;
 472                         else if (c == '\t') {
 473                                 col = (col/tok->tabsize + 1) * tok->tabsize;
 474                                 altcol = (altcol/tok->alttabsize + 1)
 475                                         * tok->alttabsize;
 476                         }
 477                         else if (c == '\014') /* Control-L (formfeed) */
 478                                 col = altcol = 0; /* For Emacs users */
 479                         else
 480                                 break;
 481                 }
 482                 tok_backup(tok, c);
 483                 if (c == '#' || c == '\n') {
 484                         /* Lines with only whitespace and/or comments
 485                            shouldn't affect the indentation and are
 486                            not passed to the parser as NEWLINE tokens,
 487                            except *totally* empty lines in interactive
 488                            mode, which signal the end of a command group. */
 489                         if (col == 0 && c == '\n' && tok->prompt != NULL)
 490                                 blankline = 0; /* Let it through */
 491                         else
 492                                 blankline = 1; /* Ignore completely */
 493                         /* We can't jump back right here since we still
 494                            may need to skip to the end of a comment */
 495                 }
 496                 if (!blankline && tok->level == 0) {
 497                         if (col == tok->indstack[tok->indent]) {
 498                                 /* No change */
 499                                 if (altcol != tok->altindstack[tok->indent]) {
 500                                         if (indenterror(tok))
 501                                                 return ERRORTOKEN;
 502                                 }
 503                         }
 504                         else if (col > tok->indstack[tok->indent]) {
 505                                 /* Indent -- always one */
 506                                 if (tok->indent+1 >= MAXINDENT) {
 507                                         fprintf(stderr, "excessive indent\n");
 508                                         tok->done = E_TOKEN;
 509                                         tok->cur = tok->inp;
 510                                         return ERRORTOKEN;
 511                                 }
 512                                 if (altcol <= tok->altindstack[tok->indent]) {
 513                                         if (indenterror(tok))
 514                                                 return ERRORTOKEN;
 515                                 }
 516                                 tok->pendin++;
 517                                 tok->indstack[++tok->indent] = col;
 518                                 tok->altindstack[tok->indent] = altcol;
 519                         }
 520                         else /* col < tok->indstack[tok->indent] */ {
 521                                 /* Dedent -- any number, must be consistent */
 522                                 while (tok->indent > 0 &&
 523                                         col < tok->indstack[tok->indent]) {
 524                                         tok->pendin--;
 525                                         tok->indent--;
 526                                 }
 527                                 if (col != tok->indstack[tok->indent]) {
 528                                         fprintf(stderr,
 529                                                 "inconsistent dedent\n");
 530                                         tok->done = E_TOKEN;
 531                                         tok->cur = tok->inp;
 532                                         return ERRORTOKEN;
 533                                 }
 534                                 if (altcol != tok->altindstack[tok->indent]) {
 535                                         if (indenterror(tok))
 536                                                 return ERRORTOKEN;
 537                                 }
 538                         }
 539                 }
 540         }
 541
 542         tok->start = tok->cur;
 543
 544         /* Return pending indents/dedents */
 545         if (tok->pendin != 0) {
 546                 if (tok->pendin < 0) {
 547                         tok->pendin++;
 548                         return DEDENT;
 549                 }
 550                 else {
 551                         tok->pendin--;
 552                         return INDENT;
 553                 }
 554         }
 555
 556  again:
 557         tok->start = NULL;
 558         /* Skip spaces */
 559         do {
 560                 c = tok_nextc(tok);
 561         } while (c == ' ' || c == '\t' || c == '\014');
 562
 563         /* Set start of current token */
 564         tok->start = tok->cur - 1;
 565
 566         /* Skip comment */
 567         if (c == '#') {
 568                 /* Hack to allow overriding the tabsize in the file.
 569                    This is also recognized by vi, when it occurs near the
 570                    beginning or end of the file.  (Will vi never die...?)
 571                    For Python it must be at the beginning of the file! */
 572                 /* XXX The real vi syntax is actually different :-( */
 573                 /* XXX Should recognize Emacs syntax, too */
 574                 int x;
 575                 if (sscanf(tok->cur,
 576                                 " vi:set tabsize=%d:", &x) == 1 &&
 577                                                 x >= 1 && x <= 40) {
 578                         /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
 579                         tok->tabsize = x;
 580                 }
 581                 do {
 582                         c = tok_nextc(tok);
 583                 } while (c != EOF && c != '\n');
 584         }
 585
 586         /* Check for EOF and errors now */
 587         if (c == EOF) {
 588                 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
 589         }
 590
 591         /* Identifier (most frequent token!) */
 592         if (isalpha(c) || c == '_') {
 593                 switch (c) {
 594                 case 'r':
 595                 case 'R':
 596                         c = tok_nextc(tok);
 597                         if (c == '"' || c == '\'')
 598                                 goto letter_quote;
 599                 }
 600                 while (isalnum(c) || c == '_') {
 601                         c = tok_nextc(tok);
 602                 }
 603                 tok_backup(tok, c);
 604                 *p_start = tok->start;
 605                 *p_end = tok->cur;
 606                 return NAME;
 607         }
 608
 609         /* Newline */
 610         if (c == '\n') {
 611                 tok->atbol = 1;
 612                 if (blankline || tok->level > 0)
 613                         goto nextline;
 614                 *p_start = tok->start;
 615                 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
 616                 return NEWLINE;
 617         }
 618
 619 #ifdef macintosh
 620         if (c == '\r') {
 621                 fprintf(stderr,
 622                   "File contains \\r characters (incorrect line endings?)\n");
 623                 tok->done = E_TOKEN;
 624                 tok->cur = tok->inp;
 625                 return ERRORTOKEN;
 626         }
 627 #endif
 628         /* Period or number starting with period? */
 629         if (c == '.') {
 630                 c = tok_nextc(tok);
 631                 if (isdigit(c)) {
 632                         goto fraction;
 633                 }
 634                 else {
 635                         tok_backup(tok, c);
 636                         *p_start = tok->start;
 637                         *p_end = tok->cur;
 638                         return DOT;
 639                 }
 640         }
 641
 642         /* Number */
 643         if (isdigit(c)) {
 644                 if (c == '0') {
 645                         /* Hex or octal */
 646                         c = tok_nextc(tok);
 647                         if (c == '.')
 648                                 goto fraction;
 649 #ifndef WITHOUT_COMPLEX
 650                         if (c == 'j' || c == 'J')
 651                                 goto imaginary;
 652 #endif
 653                         if (c == 'x' || c == 'X') {
 654                                 /* Hex */
 655                                 do {
 656                                         c = tok_nextc(tok);
 657                                 } while (isxdigit(c));
 658                         }
 659                         else {
 660                                 /* XXX This is broken!  E.g.,
 661                                    09.9 should be accepted as float! */
 662                                 /* Octal; c is first char of it */
 663                                 /* There's no 'isoctdigit' macro, sigh */
 664                                 while ('0' <= c && c < '8') {
 665                                         c = tok_nextc(tok);
 666                                 }
 667                         }
 668                         if (c == 'l' || c == 'L')
 669                                 c = tok_nextc(tok);
 670                 }
 671                 else {
 672                         /* Decimal */
 673                         do {
 674                                 c = tok_nextc(tok);
 675                         } while (isdigit(c));
 676                         if (c == 'l' || c == 'L')
 677                                 c = tok_nextc(tok);
 678                         else {
 679                                 /* Accept floating point numbers.
 680                                    XXX This accepts incomplete things like
 681                                    XXX 12e or 1e+; worry run-time */
 682                                 if (c == '.') {
 683                 fraction:
 684                                         /* Fraction */
 685                                         do {
 686                                                 c = tok_nextc(tok);
 687                                         } while (isdigit(c));
 688                                 }
 689                                 if (c == 'e' || c == 'E') {
 690                                         /* Exponent part */
 691                                         c = tok_nextc(tok);
 692                                         if (c == '+' || c == '-')
 693                                                 c = tok_nextc(tok);
 694                                         while (isdigit(c)) {
 695                                                 c = tok_nextc(tok);
 696                                         }
 697                                 }
 698 #ifndef WITHOUT_COMPLEX
 699                                 if (c == 'j' || c == 'J')
 700                                         /* Imaginary part */
 701                 imaginary:
 702                                         c = tok_nextc(tok);
 703 #endif
 704                         }
 705                 }
 706                 tok_backup(tok, c);
 707                 *p_start = tok->start;
 708                 *p_end = tok->cur;
 709                 return NUMBER;
 710         }
 711
 712   letter_quote:
 713         /* String */
 714         if (c == '\'' || c == '"') {
 715                 int quote2 = tok->cur - tok->start + 1;
 716                 int quote = c;
 717                 int triple = 0;
 718                 int tripcount = 0;
 719                 for (;;) {
 720                         c = tok_nextc(tok);
 721                         if (c == '\n') {
 722                                 if (!triple) {
 723                                         tok->done = E_TOKEN;
 724                                         tok_backup(tok, c);
 725                                         return ERRORTOKEN;
 726                                 }
 727                                 tripcount = 0;
 728                         }
 729                         else if (c == EOF) {
 730                                 tok->done = E_TOKEN;
 731                                 tok->cur = tok->inp;
 732                                 return ERRORTOKEN;
 733                         }
 734                         else if (c == quote) {
 735                                 tripcount++;
 736                                 if (tok->cur - tok->start == quote2) {
 737                                         c = tok_nextc(tok);
 738                                         if (c == quote) {
 739                                                 triple = 1;
 740                                                 tripcount = 0;
 741                                                 continue;
 742                                         }
 743                                         tok_backup(tok, c);
 744                                 }
 745                                 if (!triple || tripcount == 3)
 746                                         break;
 747                         }
 748                         else if (c == '\\') {
 749                                 tripcount = 0;
 750                                 c = tok_nextc(tok);
 751                                 if (c == EOF) {
 752                                         tok->done = E_TOKEN;
 753                                         tok->cur = tok->inp;
 754                                         return ERRORTOKEN;
 755                                 }
 756                         }
 757                         else
 758                                 tripcount = 0;
 759                 }
 760                 *p_start = tok->start;
 761                 *p_end = tok->cur;
 762                 return STRING;
 763         }
 764
 765         /* Line continuation */
 766         if (c == '\\') {
 767                 c = tok_nextc(tok);
 768                 if (c != '\n') {
 769                         tok->done = E_TOKEN;
 770                         tok->cur = tok->inp;
 771                         return ERRORTOKEN;
 772                 }
 773                 goto again; /* Read next line */
 774         }
 775
 776         /* Check for two-character token */
 777         {
 778                 int c2 = tok_nextc(tok);
 779                 int token = PyToken_TwoChars(c, c2);
 780                 if (token != OP) {
 781                         *p_start = tok->start;
 782                         *p_end = tok->cur;
 783                         return token;
 784                 }
 785                 tok_backup(tok, c2);
 786         }
 787
 788         /* Keep track of parentheses nesting level */
 789         switch (c) {
 790         case '(':
 791         case '[':
 792         case '{':
 793                 tok->level++;
 794                 break;
 795         case ')':
 796         case ']':
 797         case '}':
 798                 tok->level--;
 799                 break;
 800         }
 801
 802         /* Punctuation character */
 803         *p_start = tok->start;
 804         *p_end = tok->cur;
 805         return PyToken_OneChar(c);
 806 }
 807
 808
 809 #ifdef Py_DEBUG
 810
 811 void
 812 tok_dump(type, start, end)
 813         int type;
 814         char *start, *end;
 815 {
 816         printf("%s", _PyParser_TokenNames[type]);
 817         if (type == NAME || type == NUMBER || type == STRING || type == OP)
 818                 printf("(%.*s)", (int)(end - start), start);
 819 }
 820
 821 #endif