Parser/tokenizer.c

   1 /***********************************************************
   2 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
   3 The Netherlands.
   4
   5                         All Rights Reserved
   6
   7 Permission to use, copy, modify, and distribute this software and its
   8 documentation for any purpose and without fee is hereby granted,
   9 provided that the above copyright notice appear in all copies and that
  10 both that copyright notice and this permission notice appear in
  11 supporting documentation, and that the names of Stichting Mathematisch
  12 Centrum or CWI or Corporation for National Research Initiatives or
  13 CNRI not be used in advertising or publicity pertaining to
  14 distribution of the software without specific, written prior
  15 permission.
  16
  17 While CWI is the initial source for this software, a modified version
  18 is made available by the Corporation for National Research Initiatives
  19 (CNRI) at the Internet address ftp://ftp.python.org.
  20
  21 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
  22 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
  23 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
  24 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
  25 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
  26 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
  27 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  28 PERFORMANCE OF THIS SOFTWARE.
  29
  30 ******************************************************************/
  31
  32 /* Tokenizer implementation */
  33
  34 #include "pgenheaders.h"
  35
  36 #include <ctype.h>
  37
  38 #include "tokenizer.h"
  39 #include "errcode.h"
  40
  41 extern char *PyOS_Readline Py_PROTO((char *));
  42 /* Return malloc'ed string including trailing \n;
  43    empty malloc'ed string for EOF;
  44    NULL if interrupted */
  45
  46 /* Don't ever change this -- it would break the portability of Python code */
  47 #define TABSIZE 8
  48
  49 /* Convert a possibly signed character to a nonnegative int */
  50 /* XXX This assumes characters are 8 bits wide */
  51 #ifdef __CHAR_UNSIGNED__
  52 #define Py_CHARMASK(c)          (c)
  53 #else
  54 #define Py_CHARMASK(c)          ((c) & 0xff)
  55 #endif
  56
  57 /* Forward */
  58 static struct tok_state *tok_new Py_PROTO((void));
  59 static int tok_nextc Py_PROTO((struct tok_state *tok));
  60 static void tok_backup Py_PROTO((struct tok_state *tok, int c));
  61
  62 /* Token names */
  63
  64 char *_PyParser_TokenNames[] = {
  65         "ENDMARKER",
  66         "NAME",
  67         "NUMBER",
  68         "STRING",
  69         "NEWLINE",
  70         "INDENT",
  71         "DEDENT",
  72         "LPAR",
  73         "RPAR",
  74         "LSQB",
  75         "RSQB",
  76         "COLON",
  77         "COMMA",
  78         "SEMI",
  79         "PLUS",
  80         "MINUS",
  81         "STAR",
  82         "SLASH",
  83         "VBAR",
  84         "AMPER",
  85         "LESS",
  86         "GREATER",
  87         "EQUAL",
  88         "DOT",
  89         "PERCENT",
  90         "BACKQUOTE",
  91         "LBRACE",
  92         "RBRACE",
  93         "EQEQUAL",
  94         "NOTEQUAL",
  95         "LESSEQUAL",
  96         "GREATEREQUAL",
  97         "TILDE",
  98         "CIRCUMFLEX",
  99         "LEFTSHIFT",
 100         "RIGHTSHIFT",
 101         "DOUBLESTAR",
 102         /* This table must match the #defines in token.h! */
 103         "OP",
 104         "<ERRORTOKEN>",
 105         "<N_TOKENS>"
 106 };
 107
 108
 109 /* Create and initialize a new tok_state structure */
 110
 111 static struct tok_state *
 112 tok_new()
 113 {
 114         struct tok_state *tok = PyMem_NEW(struct tok_state, 1);
 115         if (tok == NULL)
 116                 return NULL;
 117         tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
 118         tok->done = E_OK;
 119         tok->fp = NULL;
 120         tok->tabsize = TABSIZE;
 121         tok->indent = 0;
 122         tok->indstack[0] = 0;
 123         tok->atbol = 1;
 124         tok->pendin = 0;
 125         tok->prompt = tok->nextprompt = NULL;
 126         tok->lineno = 0;
 127         tok->level = 0;
 128         tok->filename = NULL;
 129         tok->altwarning = 0;
 130         tok->alterror = 0;
 131         tok->alttabsize = 1;
 132         tok->altindstack[0] = 0;
 133         return tok;
 134 }
 135
 136
 137 /* Set up tokenizer for string */
 138
 139 struct tok_state *
 140 PyTokenizer_FromString(str)
 141         char *str;
 142 {
 143         struct tok_state *tok = tok_new();
 144         if (tok == NULL)
 145                 return NULL;
 146         tok->buf = tok->cur = tok->end = tok->inp = str;
 147         return tok;
 148 }
 149
 150
 151 /* Set up tokenizer for file */
 152
 153 struct tok_state *
 154 PyTokenizer_FromFile(fp, ps1, ps2)
 155         FILE *fp;
 156         char *ps1, *ps2;
 157 {
 158         struct tok_state *tok = tok_new();
 159         if (tok == NULL)
 160                 return NULL;
 161         if ((tok->buf = PyMem_NEW(char, BUFSIZ)) == NULL) {
 162                 PyMem_DEL(tok);
 163                 return NULL;
 164         }
 165         tok->cur = tok->inp = tok->buf;
 166         tok->end = tok->buf + BUFSIZ;
 167         tok->fp = fp;
 168         tok->prompt = ps1;
 169         tok->nextprompt = ps2;
 170         return tok;
 171 }
 172
 173
 174 /* Free a tok_state structure */
 175
 176 void
 177 PyTokenizer_Free(tok)
 178         struct tok_state *tok;
 179 {
 180         if (tok->fp != NULL && tok->buf != NULL)
 181                 PyMem_DEL(tok->buf);
 182         PyMem_DEL(tok);
 183 }
 184
 185
 186 /* Get next char, updating state; error code goes into tok->done */
 187
 188 static int
 189 tok_nextc(tok)
 190         register struct tok_state *tok;
 191 {
 192         for (;;) {
 193                 if (tok->cur != tok->inp) {
 194                         return Py_CHARMASK(*tok->cur++); /* Fast path */
 195                 }
 196                 if (tok->done != E_OK)
 197                         return EOF;
 198                 if (tok->fp == NULL) {
 199                         char *end = strchr(tok->inp, '\n');
 200                         if (end != NULL)
 201                                 end++;
 202                         else {
 203                                 end = strchr(tok->inp, '\0');
 204                                 if (end == tok->inp) {
 205                                         tok->done = E_EOF;
 206                                         return EOF;
 207                                 }
 208                         }
 209                         if (tok->start == NULL)
 210                                 tok->buf = tok->cur;
 211                         tok->lineno++;
 212                         tok->inp = end;
 213                         return Py_CHARMASK(*tok->cur++);
 214                 }
 215                 if (tok->prompt != NULL) {
 216                         char *new = PyOS_Readline(tok->prompt);
 217                         if (tok->nextprompt != NULL)
 218                                 tok->prompt = tok->nextprompt;
 219                         if (new == NULL)
 220                                 tok->done = E_INTR;
 221                         else if (*new == '\0') {
 222                                 free(new);
 223                                 tok->done = E_EOF;
 224                         }
 225                         else if (tok->start != NULL) {
 226                                 int start = tok->start - tok->buf;
 227                                 int oldlen = tok->cur - tok->buf;
 228                                 int newlen = oldlen + strlen(new);
 229                                 char *buf = realloc(tok->buf, newlen+1);
 230                                 tok->lineno++;
 231                                 if (buf == NULL) {
 232                                         free(tok->buf);
 233                                         tok->buf = NULL;
 234                                         free(new);
 235                                         tok->done = E_NOMEM;
 236                                         return EOF;
 237                                 }
 238                                 tok->buf = buf;
 239                                 tok->cur = tok->buf + oldlen;
 240                                 strcpy(tok->buf + oldlen, new);
 241                                 free(new);
 242                                 tok->inp = tok->buf + newlen;
 243                                 tok->end = tok->inp + 1;
 244                                 tok->start = tok->buf + start;
 245                         }
 246                         else {
 247                                 tok->lineno++;
 248                                 if (tok->buf != NULL)
 249                                         free(tok->buf);
 250                                 tok->buf = new;
 251                                 tok->cur = tok->buf;
 252                                 tok->inp = strchr(tok->buf, '\0');
 253                                 tok->end = tok->inp + 1;
 254                         }
 255                 }
 256                 else {
 257                         int done = 0;
 258                         int cur = 0;
 259                         char *pt;
 260                         if (tok->start == NULL) {
 261                                 if (tok->buf == NULL) {
 262                                         tok->buf = PyMem_NEW(char, BUFSIZ);
 263                                         if (tok->buf == NULL) {
 264                                                 tok->done = E_NOMEM;
 265                                                 return EOF;
 266                                         }
 267                                         tok->end = tok->buf + BUFSIZ;
 268                                 }
 269                                 if (fgets(tok->buf, (int)(tok->end - tok->buf),
 270                                           tok->fp) == NULL) {
 271                                         tok->done = E_EOF;
 272                                         done = 1;
 273                                 }
 274                                 else {
 275                                         tok->done = E_OK;
 276                                         tok->inp = strchr(tok->buf, '\0');
 277                                         done = tok->inp[-1] == '\n';
 278                                 }
 279                         }
 280                         else {
 281                                 cur = tok->cur - tok->buf;
 282                                 if (feof(tok->fp)) {
 283                                         tok->done = E_EOF;
 284                                         done = 1;
 285                                 }
 286                                 else
 287                                         tok->done = E_OK;
 288                         }
 289                         tok->lineno++;
 290                         /* Read until '\n' or EOF */
 291                         while (!done) {
 292                                 int curstart = tok->start == NULL ? -1 :
 293                                                tok->start - tok->buf;
 294                                 int curvalid = tok->inp - tok->buf;
 295                                 int newsize = curvalid + BUFSIZ;
 296                                 char *newbuf = tok->buf;
 297                                 PyMem_RESIZE(newbuf, char, newsize);
 298                                 if (newbuf == NULL) {
 299                                         tok->done = E_NOMEM;
 300                                         tok->cur = tok->inp;
 301                                         return EOF;
 302                                 }
 303                                 tok->buf = newbuf;
 304                                 tok->inp = tok->buf + curvalid;
 305                                 tok->end = tok->buf + newsize;
 306                                 tok->start = curstart < 0 ? NULL :
 307                                              tok->buf + curstart;
 308                                 if (fgets(tok->inp,
 309                                                (int)(tok->end - tok->inp),
 310                                                tok->fp) == NULL) {
 311                                         /* Last line does not end in \n,
 312                                            fake one */
 313                                         strcpy(tok->inp, "\n");
 314                                 }
 315                                 tok->inp = strchr(tok->inp, '\0');
 316                                 done = tok->inp[-1] == '\n';
 317                         }
 318                         tok->cur = tok->buf + cur;
 319 #ifndef macintosh
 320                         /* replace "\r\n" with "\n" */
 321                         /* For Mac we leave the \r, giving a syntax error */
 322                         pt = tok->inp - 2;
 323                         if (pt >= tok->buf && *pt == '\r') {
 324                                 *pt++ = '\n';
 325                                 *pt = '\0';
 326                                 tok->inp = pt;
 327                         }
 328 #endif
 329                 }
 330                 if (tok->done != E_OK) {
 331                         if (tok->prompt != NULL)
 332                                 PySys_WriteStderr("\n");
 333                         tok->cur = tok->inp;
 334                         return EOF;
 335                 }
 336         }
 337         /*NOTREACHED*/
 338 }
 339
 340
 341 /* Back-up one character */
 342
 343 static void
 344 tok_backup(tok, c)
 345         register struct tok_state *tok;
 346         register int c;
 347 {
 348         if (c != EOF) {
 349                 if (--tok->cur < tok->buf)
 350                         Py_FatalError("tok_backup: begin of buffer");
 351                 if (*tok->cur != c)
 352                         *tok->cur = c;
 353         }
 354 }
 355
 356
 357 /* Return the token corresponding to a single character */
 358
 359 int
 360 PyToken_OneChar(c)
 361         int c;
 362 {
 363         switch (c) {
 364         case '(':       return LPAR;
 365         case ')':       return RPAR;
 366         case '[':       return LSQB;
 367         case ']':       return RSQB;
 368         case ':':       return COLON;
 369         case ',':       return COMMA;
 370         case ';':       return SEMI;
 371         case '+':       return PLUS;
 372         case '-':       return MINUS;
 373         case '*':       return STAR;
 374         case '/':       return SLASH;
 375         case '|':       return VBAR;
 376         case '&':       return AMPER;
 377         case '<':       return LESS;
 378         case '>':       return GREATER;
 379         case '=':       return EQUAL;
 380         case '.':       return DOT;
 381         case '%':       return PERCENT;
 382         case '`':       return BACKQUOTE;
 383         case '{':       return LBRACE;
 384         case '}':       return RBRACE;
 385         case '^':       return CIRCUMFLEX;
 386         case '~':       return TILDE;
 387         default:        return OP;
 388         }
 389 }
 390
 391
 392 int
 393 PyToken_TwoChars(c1, c2)
 394         int c1, c2;
 395 {
 396         switch (c1) {
 397         case '=':
 398                 switch (c2) {
 399                 case '=':       return EQEQUAL;
 400                 }
 401                 break;
 402         case '!':
 403                 switch (c2) {
 404                 case '=':       return NOTEQUAL;
 405                 }
 406                 break;
 407         case '<':
 408                 switch (c2) {
 409                 case '>':       return NOTEQUAL;
 410                 case '=':       return LESSEQUAL;
 411                 case '<':       return LEFTSHIFT;
 412                 }
 413                 break;
 414         case '>':
 415                 switch (c2) {
 416                 case '=':       return GREATEREQUAL;
 417                 case '>':       return RIGHTSHIFT;
 418                 }
 419                 break;
 420         case '*':
 421                 switch (c2) {
 422                 case '*':       return DOUBLESTAR;
 423                 }
 424                 break;
 425         }
 426         return OP;
 427 }
 428
 429
 430 static int
 431 indenterror(tok)
 432         struct tok_state *tok;
 433 {
 434         if (tok->alterror) {
 435                 tok->done = E_INDENT;
 436                 tok->cur = tok->inp;
 437                 return 1;
 438         }
 439         if (tok->altwarning) {
 440                 PySys_WriteStderr("%s: inconsistent tab/space usage\n",
 441                         tok->filename);
 442                 tok->altwarning = 0;
 443         }
 444         return 0;
 445 }
 446
 447
 448 /* Get next token, after space stripping etc. */
 449
 450 int
 451 PyTokenizer_Get(tok, p_start, p_end)
 452         register struct tok_state *tok; /* In/out: tokenizer state */
 453         char **p_start, **p_end; /* Out: point to start/end of token */
 454 {
 455         register int c;
 456         int blankline;
 457
 458         *p_start = *p_end = NULL;
 459   nextline:
 460         tok->start = NULL;
 461         blankline = 0;
 462
 463         /* Get indentation level */
 464         if (tok->atbol) {
 465                 register int col = 0;
 466                 register int altcol = 0;
 467                 tok->atbol = 0;
 468                 for (;;) {
 469                         c = tok_nextc(tok);
 470                         if (c == ' ')
 471                                 col++, altcol++;
 472                         else if (c == '\t') {
 473                                 col = (col/tok->tabsize + 1) * tok->tabsize;
 474                                 altcol = (altcol/tok->alttabsize + 1)
 475                                         * tok->alttabsize;
 476                         }
 477                         else if (c == '\014') /* Control-L (formfeed) */
 478                                 col = altcol = 0; /* For Emacs users */
 479                         else
 480                                 break;
 481                 }
 482                 tok_backup(tok, c);
 483                 if (c == '#' || c == '\n') {
 484                         /* Lines with only whitespace and/or comments
 485                            shouldn't affect the indentation and are
 486                            not passed to the parser as NEWLINE tokens,
 487                            except *totally* empty lines in interactive
 488                            mode, which signal the end of a command group. */
 489                         if (col == 0 && c == '\n' && tok->prompt != NULL)
 490                                 blankline = 0; /* Let it through */
 491                         else
 492                                 blankline = 1; /* Ignore completely */
 493                         /* We can't jump back right here since we still
 494                            may need to skip to the end of a comment */
 495                 }
 496                 if (!blankline && tok->level == 0) {
 497                         if (col == tok->indstack[tok->indent]) {
 498                                 /* No change */
 499                                 if (altcol != tok->altindstack[tok->indent]) {
 500                                         if (indenterror(tok))
 501                                                 return ERRORTOKEN;
 502                                 }
 503                         }
 504                         else if (col > tok->indstack[tok->indent]) {
 505                                 /* Indent -- always one */
 506                                 if (tok->indent+1 >= MAXINDENT) {
 507                                         PySys_WriteStderr(
 508                                                 "excessive indent\n");
 509                                         tok->done = E_TOKEN;
 510                                         tok->cur = tok->inp;
 511                                         return ERRORTOKEN;
 512                                 }
 513                                 if (altcol <= tok->altindstack[tok->indent]) {
 514                                         if (indenterror(tok))
 515                                                 return ERRORTOKEN;
 516                                 }
 517                                 tok->pendin++;
 518                                 tok->indstack[++tok->indent] = col;
 519                                 tok->altindstack[tok->indent] = altcol;
 520                         }
 521                         else /* col < tok->indstack[tok->indent] */ {
 522                                 /* Dedent -- any number, must be consistent */
 523                                 while (tok->indent > 0 &&
 524                                         col < tok->indstack[tok->indent]) {
 525                                         tok->pendin--;
 526                                         tok->indent--;
 527                                 }
 528                                 if (col != tok->indstack[tok->indent]) {
 529                                         fprintf(stderr,
 530                                                 "inconsistent dedent\n");
 531                                         tok->done = E_TOKEN;
 532                                         tok->cur = tok->inp;
 533                                         return ERRORTOKEN;
 534                                 }
 535                                 if (altcol != tok->altindstack[tok->indent]) {
 536                                         if (indenterror(tok))
 537                                                 return ERRORTOKEN;
 538                                 }
 539                         }
 540                 }
 541         }
 542
 543         tok->start = tok->cur;
 544
 545         /* Return pending indents/dedents */
 546         if (tok->pendin != 0) {
 547                 if (tok->pendin < 0) {
 548                         tok->pendin++;
 549                         return DEDENT;
 550                 }
 551                 else {
 552                         tok->pendin--;
 553                         return INDENT;
 554                 }
 555         }
 556
 557  again:
 558         tok->start = NULL;
 559         /* Skip spaces */
 560         do {
 561                 c = tok_nextc(tok);
 562         } while (c == ' ' || c == '\t' || c == '\014');
 563
 564         /* Set start of current token */
 565         tok->start = tok->cur - 1;
 566
 567         /* Skip comment */
 568         if (c == '#') {
 569                 /* Hack to allow overriding the tabsize in the file.
 570                    This is also recognized by vi, when it occurs near the
 571                    beginning or end of the file.  (Will vi never die...?)
 572                    For Python it must be at the beginning of the file! */
 573                 /* XXX The real vi syntax is actually different :-( */
 574                 /* XXX Should recognize Emacs syntax, too */
 575                 int x;
 576                 if (sscanf(tok->cur,
 577                                 " vi:set tabsize=%d:", &x) == 1 &&
 578                                                 x >= 1 && x <= 40) {
 579                         /* PySys_WriteStderr("# vi:set tabsize=%d:\n", x); */
 580                         tok->tabsize = x;
 581                 }
 582                 do {
 583                         c = tok_nextc(tok);
 584                 } while (c != EOF && c != '\n');
 585         }
 586
 587         /* Check for EOF and errors now */
 588         if (c == EOF) {
 589                 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
 590         }
 591
 592         /* Identifier (most frequent token!) */
 593         if (isalpha(c) || c == '_') {
 594                 switch (c) {
 595                 case 'r':
 596                 case 'R':
 597                         c = tok_nextc(tok);
 598                         if (c == '"' || c == '\'')
 599                                 goto letter_quote;
 600                 }
 601                 while (isalnum(c) || c == '_') {
 602                         c = tok_nextc(tok);
 603                 }
 604                 tok_backup(tok, c);
 605                 *p_start = tok->start;
 606                 *p_end = tok->cur;
 607                 return NAME;
 608         }
 609
 610         /* Newline */
 611         if (c == '\n') {
 612                 tok->atbol = 1;
 613                 if (blankline || tok->level > 0)
 614                         goto nextline;
 615                 *p_start = tok->start;
 616                 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
 617                 return NEWLINE;
 618         }
 619
 620 #ifdef macintosh
 621         if (c == '\r') {
 622                 PySys_WriteStderr(
 623                   "File contains \\r characters (incorrect line endings?)\n");
 624                 tok->done = E_TOKEN;
 625                 tok->cur = tok->inp;
 626                 return ERRORTOKEN;
 627         }
 628 #endif
 629         /* Period or number starting with period? */
 630         if (c == '.') {
 631                 c = tok_nextc(tok);
 632                 if (isdigit(c)) {
 633                         goto fraction;
 634                 }
 635                 else {
 636                         tok_backup(tok, c);
 637                         *p_start = tok->start;
 638                         *p_end = tok->cur;
 639                         return DOT;
 640                 }
 641         }
 642
 643         /* Number */
 644         if (isdigit(c)) {
 645                 if (c == '0') {
 646                         /* Hex or octal */
 647                         c = tok_nextc(tok);
 648                         if (c == '.')
 649                                 goto fraction;
 650 #ifndef WITHOUT_COMPLEX
 651                         if (c == 'j' || c == 'J')
 652                                 goto imaginary;
 653 #endif
 654                         if (c == 'x' || c == 'X') {
 655                                 /* Hex */
 656                                 do {
 657                                         c = tok_nextc(tok);
 658                                 } while (isxdigit(c));
 659                         }
 660                         else {
 661                                 /* XXX This is broken!  E.g.,
 662                                    09.9 should be accepted as float! */
 663                                 /* Octal; c is first char of it */
 664                                 /* There's no 'isoctdigit' macro, sigh */
 665                                 while ('0' <= c && c < '8') {
 666                                         c = tok_nextc(tok);
 667                                 }
 668                         }
 669                         if (c == 'l' || c == 'L')
 670                                 c = tok_nextc(tok);
 671                 }
 672                 else {
 673                         /* Decimal */
 674                         do {
 675                                 c = tok_nextc(tok);
 676                         } while (isdigit(c));
 677                         if (c == 'l' || c == 'L')
 678                                 c = tok_nextc(tok);
 679                         else {
 680                                 /* Accept floating point numbers.
 681                                    XXX This accepts incomplete things like
 682                                    XXX 12e or 1e+; worry run-time */
 683                                 if (c == '.') {
 684                 fraction:
 685                                         /* Fraction */
 686                                         do {
 687                                                 c = tok_nextc(tok);
 688                                         } while (isdigit(c));
 689                                 }
 690                                 if (c == 'e' || c == 'E') {
 691                                         /* Exponent part */
 692                                         c = tok_nextc(tok);
 693                                         if (c == '+' || c == '-')
 694                                                 c = tok_nextc(tok);
 695                                         while (isdigit(c)) {
 696                                                 c = tok_nextc(tok);
 697                                         }
 698                                 }
 699 #ifndef WITHOUT_COMPLEX
 700                                 if (c == 'j' || c == 'J')
 701                                         /* Imaginary part */
 702                 imaginary:
 703                                         c = tok_nextc(tok);
 704 #endif
 705                         }
 706                 }
 707                 tok_backup(tok, c);
 708                 *p_start = tok->start;
 709                 *p_end = tok->cur;
 710                 return NUMBER;
 711         }
 712
 713   letter_quote:
 714         /* String */
 715         if (c == '\'' || c == '"') {
 716                 int quote2 = tok->cur - tok->start + 1;
 717                 int quote = c;
 718                 int triple = 0;
 719                 int tripcount = 0;
 720                 for (;;) {
 721                         c = tok_nextc(tok);
 722                         if (c == '\n') {
 723                                 if (!triple) {
 724                                         tok->done = E_TOKEN;
 725                                         tok_backup(tok, c);
 726                                         return ERRORTOKEN;
 727                                 }
 728                                 tripcount = 0;
 729                         }
 730                         else if (c == EOF) {
 731                                 tok->done = E_TOKEN;
 732                                 tok->cur = tok->inp;
 733                                 return ERRORTOKEN;
 734                         }
 735                         else if (c == quote) {
 736                                 tripcount++;
 737                                 if (tok->cur - tok->start == quote2) {
 738                                         c = tok_nextc(tok);
 739                                         if (c == quote) {
 740                                                 triple = 1;
 741                                                 tripcount = 0;
 742                                                 continue;
 743                                         }
 744                                         tok_backup(tok, c);
 745                                 }
 746                                 if (!triple || tripcount == 3)
 747                                         break;
 748                         }
 749                         else if (c == '\\') {
 750                                 tripcount = 0;
 751                                 c = tok_nextc(tok);
 752                                 if (c == EOF) {
 753                                         tok->done = E_TOKEN;
 754                                         tok->cur = tok->inp;
 755                                         return ERRORTOKEN;
 756                                 }
 757                         }
 758                         else
 759                                 tripcount = 0;
 760                 }
 761                 *p_start = tok->start;
 762                 *p_end = tok->cur;
 763                 return STRING;
 764         }
 765
 766         /* Line continuation */
 767         if (c == '\\') {
 768                 c = tok_nextc(tok);
 769                 if (c != '\n') {
 770                         tok->done = E_TOKEN;
 771                         tok->cur = tok->inp;
 772                         return ERRORTOKEN;
 773                 }
 774                 goto again; /* Read next line */
 775         }
 776
 777         /* Check for two-character token */
 778         {
 779                 int c2 = tok_nextc(tok);
 780                 int token = PyToken_TwoChars(c, c2);
 781                 if (token != OP) {
 782                         *p_start = tok->start;
 783                         *p_end = tok->cur;
 784                         return token;
 785                 }
 786                 tok_backup(tok, c2);
 787         }
 788
 789         /* Keep track of parentheses nesting level */
 790         switch (c) {
 791         case '(':
 792         case '[':
 793         case '{':
 794                 tok->level++;
 795                 break;
 796         case ')':
 797         case ']':
 798         case '}':
 799                 tok->level--;
 800                 break;
 801         }
 802
 803         /* Punctuation character */
 804         *p_start = tok->start;
 805         *p_end = tok->cur;
 806         return PyToken_OneChar(c);
 807 }
 808
 809
 810 #ifdef Py_DEBUG
 811
 812 void
 813 tok_dump(type, start, end)
 814         int type;
 815         char *start, *end;
 816 {
 817         printf("%s", _PyParser_TokenNames[type]);
 818         if (type == NAME || type == NUMBER || type == STRING || type == OP)
 819                 printf("(%.*s)", (int)(end - start), start);
 820 }
 821
 822 #endif