soltools/cpp/_tokens.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <ctype.h>
   5 #if (defined(_WIN32) || defined(_MSDOS) || defined(__IBMC__))
   6 #include <io.h>
   7 #else
   8 #include <unistd.h>
   9 #endif
  10 #include "cpp.h"
  11
  12
  13 static char wbuf[4 * OBS];
  14 static char *wbp = wbuf;
  15 static int EBCDIC_ExternTokenDetected = 0;
  16 static int EBCDIC_StartTokenDetected = 0;
  17
  18 unsigned char toLatin1[256] =
  19 {
  20     0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d,
  21     0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13,
  22     0x9d, 0x0a, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d,
  23     0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b,
  24     0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 0x90, 0x91,
  25     0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b,
  26     0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1,
  27     0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
  28     0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf,
  29     0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 0x2d, 0x2f, 0xc2, 0xc4,
  30     0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f,
  31     0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
  32     0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
  33     0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
  34     0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, 0xb0, 0x6a, 0x6b, 0x6c,
  35     0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8,
  36     0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
  37     0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3,
  38     0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8,
  39     0xaf, 0x5d, 0xb4, 0xd7, 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45,
  40     0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
  41     0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52,
  42     0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54,
  43     0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2,
  44     0xd3, 0xd5, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  45     0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
  46 };
  47
  48 #define MASK    "\\x%x"
  49
  50 int
  51     memcpy_EBCDIC( char * pwbuf, uchar *p, int len )
  52 {
  53     int currpos = 0;
  54     int processedchars = 0;
  55
  56     if( len == 0 )
  57         return 0;
  58
  59     if( len == 1 )
  60     {
  61         *pwbuf = *p;
  62         return 1;
  63     }
  64
  65     /* copy spaces until " or ' */
  66     while( (p[ processedchars ] != '\"') && (p[ processedchars ] != '\'') )
  67         pwbuf[ currpos++ ] = p[ processedchars++ ];
  68
  69     /* copy first " or ' */
  70     pwbuf[ currpos++ ] = p[ processedchars++ ];
  71
  72     /* convert all characters until " or ' */
  73     while( processedchars < (len - 1) )
  74     {
  75         if( p[ processedchars ] == '\\' )
  76         {
  77             switch( p[ ++processedchars ] )
  78             {
  79                 case 'n':
  80                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\n'] );
  81                     processedchars++;
  82                     break;
  83
  84                 case 't':
  85                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\t'] );
  86                     processedchars++;
  87                     break;
  88
  89                 case 'v':
  90                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\v'] );
  91                     processedchars++;
  92                     break;
  93
  94                 case 'b':
  95                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\b'] );
  96                     processedchars++;
  97                     break;
  98
  99                 case 'r':
 100                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\r'] );
 101                     processedchars++;
 102                     break;
 103
 104                 case 'f':
 105                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\f'] );
 106                     processedchars++;
 107                     break;
 108
 109                 case 'a':
 110                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\a'] );
 111                     processedchars++;
 112                     break;
 113
 114                 case '\\':
 115                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\\'] );
 116                     processedchars++;
 117                     break;
 118
 119                 case '?':
 120                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\?'] );
 121                     processedchars++;
 122                     break;
 123
 124                 case '\'':
 125                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\''] );
 126                     processedchars++;
 127                     break;
 128
 129                 case '"':
 130                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\"'] );
 131                     processedchars++;
 132                     break;
 133
 134                 /* octal coded character? -> copy */
 135                 case '0':
 136                 case '1':
 137                 case '2':
 138                 case '3':
 139                 case '4':
 140                 case '5':
 141                 case '6':
 142                 case '7':
 143                     {
 144                     int startpos = currpos;
 145
 146                     pwbuf[ currpos++ ] = '\\';
 147
 148                     while( p[ processedchars ] >= '0' && p[ processedchars ] <= '7' && (currpos < startpos + 4) )
 149                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
 150                     break;
 151                     }
 152
 153                 /* hex coded character? -> copy */
 154                 case 'x':
 155                 case 'X':
 156                     {
 157                     int startpos = currpos;
 158
 159                     pwbuf[ currpos++ ] = '\\';
 160                     pwbuf[ currpos++ ] = 'x';
 161                     processedchars++;
 162
 163                     while( isxdigit( p[ processedchars ] ) && (currpos < startpos + 4) )
 164                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
 165                     break;
 166                     }
 167
 168             }
 169         }
 170         else
 171             currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1[p[ processedchars++ ]] );
 172
 173     }
 174
 175     /* copy last " or ' */
 176     pwbuf[ currpos++ ] = p[ processedchars ];
 177
 178     return currpos;
 179 }
 180
 181 void
 182     maketokenrow(int size, Tokenrow * trp)
 183 {
 184     trp->max = size;
 185     if (size > 0)
 186         trp->bp = (Token *) domalloc(size * sizeof(Token));
 187     else
 188         trp->bp = NULL;
 189     trp->tp = trp->bp;
 190     trp->lp = trp->bp;
 191 }
 192
 193 Token *
 194     growtokenrow(Tokenrow * trp)
 195 {
 196     int ncur = trp->tp - trp->bp;
 197     int nlast = trp->lp - trp->bp;
 198
 199     trp->max = 3 * trp->max / 2 + 1;
 200     trp->bp = (Token *) realloc(trp->bp, trp->max * sizeof(Token));
 201     trp->lp = &trp->bp[nlast];
 202     trp->tp = &trp->bp[ncur];
 203     return trp->lp;
 204 }
 205
 206 /*
 207  * Compare a row of tokens, ignoring the content of WS; return !=0 if different
 208  */
 209 int
 210     comparetokens(Tokenrow * tr1, Tokenrow * tr2)
 211 {
 212     Token *tp1, *tp2;
 213
 214     tp1 = tr1->tp;
 215     tp2 = tr2->tp;
 216     if (tr1->lp - tp1 != tr2->lp - tp2)
 217         return 1;
 218     for (; tp1 < tr1->lp; tp1++, tp2++)
 219     {
 220         if (tp1->type != tp2->type
 221             || (tp1->wslen == 0) != (tp2->wslen == 0)
 222             || tp1->len != tp2->len
 223             || strncmp((char *) tp1->t, (char *) tp2->t, tp1->len) != 0)
 224             return 1;
 225     }
 226     return 0;
 227 }
 228
 229 /*
 230  * replace ntok tokens starting at dtr->tp with the contents of str.
 231  * tp ends up pointing just beyond the replacement.
 232  * Canonical whitespace is assured on each side.
 233  */
 234 void
 235     insertrow(Tokenrow * dtr, int ntok, Tokenrow * str)
 236 {
 237     int nrtok = rowlen(str);
 238
 239     dtr->tp += ntok;
 240     adjustrow(dtr, nrtok - ntok);
 241     dtr->tp -= ntok;
 242     movetokenrow(dtr, str);
 243     dtr->tp += nrtok;
 244 }
 245
 246 /*
 247  * make sure there is WS before trp->tp, if tokens might merge in the output
 248  */
 249 void
 250     makespace(Tokenrow * trp, Token * ntp)
 251 {
 252     uchar *tt;
 253     Token *tp = trp->tp;
 254
 255     if (tp >= trp->lp)
 256         return;
 257
 258     if (ntp->wslen)
 259     {
 260         tt = newstring(tp->t, tp->len, ntp->wslen);
 261         strncpy((char *)tt, (char *)ntp->t - ntp->wslen, ntp->wslen);
 262         tp->t = tt + ntp->wslen;
 263         tp->wslen = ntp->wslen;
 264         tp->flag |= XPWS;
 265     }
 266 }
 267
 268 /*
 269  * Copy an entire tokenrow into another, at tp.
 270  * It is assumed that there is enough space.
 271  *  Not strictly conforming.
 272  */
 273 void
 274     movetokenrow(Tokenrow * dtr, Tokenrow * str)
 275 {
 276     int nby;
 277
 278     /* nby = sizeof(Token) * (str->lp - str->bp); */
 279     nby = (char *) str->lp - (char *) str->bp;
 280     memmove(dtr->tp, str->bp, nby);
 281 }
 282
 283 /*
 284  * Move the tokens in a row, starting at tr->tp, rightward by nt tokens;
 285  * nt may be negative (left move).
 286  * The row may need to be grown.
 287  * Non-strictly conforming because of the (char *), but easily fixed
 288  */
 289 void
 290     adjustrow(Tokenrow * trp, int nt)
 291 {
 292     int nby, size;
 293
 294     if (nt == 0)
 295         return;
 296     size = (trp->lp - trp->bp) + nt;
 297     while (size > trp->max)
 298         growtokenrow(trp);
 299     /* nby = sizeof(Token) * (trp->lp - trp->tp); */
 300     nby = (char *) trp->lp - (char *) trp->tp;
 301     if (nby)
 302         memmove(trp->tp + nt, trp->tp, nby);
 303     trp->lp += nt;
 304 }
 305
 306 /*
 307  * Copy a row of tokens into the destination holder, allocating
 308  * the space for the contents.  Return the destination.
 309  */
 310 Tokenrow *
 311     copytokenrow(Tokenrow * dtr, Tokenrow * str)
 312 {
 313     int len = rowlen(str);
 314
 315     maketokenrow(len, dtr);
 316     movetokenrow(dtr, str);
 317     dtr->lp += len;
 318     return dtr;
 319 }
 320
 321 /*
 322  * Produce a copy of a row of tokens.  Start at trp->tp.
 323  * The value strings are copied as well.  The first token
 324  * has WS available.
 325  */
 326 Tokenrow *
 327     normtokenrow(Tokenrow * trp)
 328 {
 329     Token *tp;
 330     Tokenrow *ntrp = new(Tokenrow);
 331     int len;
 332
 333     len = trp->lp - trp->tp;
 334     if (len <= 0)
 335         len = 1;
 336     maketokenrow(len, ntrp);
 337     for (tp = trp->tp; tp < trp->lp; tp++)
 338     {
 339         *ntrp->lp = *tp;
 340         if (tp->len)
 341         {
 342             ntrp->lp->t = newstring(tp->t, tp->len, 1);
 343             *ntrp->lp->t++ = ' ';
 344             if (tp->wslen)
 345                 ntrp->lp->wslen = 1;
 346         }
 347         ntrp->lp++;
 348     }
 349     if (ntrp->lp > ntrp->bp)
 350         ntrp->bp->wslen = 0;
 351     return ntrp;
 352 }
 353
 354 /*
 355  * Debugging
 356  */
 357 void
 358     peektokens(Tokenrow * trp, char *str)
 359 {
 360     Token *tp;
 361
 362     tp = trp->tp;
 363     flushout();
 364     if (str)
 365         fprintf(stderr, "%s ", str);
 366     if (tp < trp->bp || tp > trp->lp)
 367         fprintf(stderr, "(tp offset %ld) ", (long int) (tp - trp->bp));
 368     for (tp = trp->bp; tp < trp->lp && tp < trp->bp + 32; tp++)
 369     {
 370         if (tp->type != NL)
 371         {
 372             int c = tp->t[tp->len];
 373
 374             tp->t[tp->len] = 0;
 375             fprintf(stderr, "%s", tp->t);
 376             tp->t[tp->len] = (uchar) c;
 377         }
 378         fprintf(stderr, tp == trp->tp ? "{%x*} " : "{%x} ", tp->type);
 379     }
 380     fprintf(stderr, "\n");
 381     fflush(stderr);
 382 }
 383
 384 void
 385     puttokens(Tokenrow * trp)
 386 {
 387     Token *tp;
 388     int len;
 389     uchar *p;
 390
 391     if (Vflag)
 392         peektokens(trp, "");
 393     tp = trp->bp;
 394     for (; tp < trp->lp; tp++)
 395     {
 396         if (tp->type != NL)
 397         {
 398             len = tp->len + tp->wslen;
 399             p = tp->t - tp->wslen;
 400
 401             /* add parameter check to delete operator? */
 402             if( Dflag )
 403             {
 404                 if( (tp->type == NAME) && (strncmp( (char*)p, "delete", len ) == 0) )
 405                 {
 406                     Token* ntp = tp;
 407                     ntp++;
 408
 409                     if( ntp->type == NAME )
 410                     {
 411                         uchar* np = ntp->t - ntp->wslen;
 412                         int nlen = ntp->len + ntp->wslen;
 413
 414                         memcpy(wbp, "if(", 3 );
 415                          wbp += 4;
 416                         memcpy(wbp, np, nlen );
 417                          wbp += nlen;
 418                         memcpy(wbp, ")", 1 );
 419                          wbp++;
 420
 421                         memcpy(wbp, p, len);
 422                     }
 423                 }
 424             }
 425
 426             /* EBCDIC to ANSI conversion requested? */
 427             if( Aflag )
 428             {
 429                 /* keyword __ToLatin1__ found? -> do conversion! */
 430                 if( EBCDIC_StartTokenDetected )
 431                 {
 432                     /* previous token was 'extern'? -> don't convert current token! */
 433                     if( EBCDIC_ExternTokenDetected )
 434                     {
 435                         EBCDIC_ExternTokenDetected = 0;
 436                         memcpy(wbp, p, len);
 437                     }
 438                     else
 439                     {
 440                         /* current token is keyword 'extern'? -> don't convert following token! */
 441                         if( (tp->wslen == 0) && (strncmp( (char*)p, "extern", len ) == 0) )
 442                         {
 443                             EBCDIC_ExternTokenDetected = 1;
 444                             memcpy(wbp, p, len);
 445                         }
 446                         else
 447                         {
 448                             /* token is string or char? -> process EBCDIC to ANSI conversion */
 449                             if ((tp->type == STRING) || (tp->type == CCON))
 450                                 len = memcpy_EBCDIC(wbp,  p, len);
 451                             else
 452                                 memcpy(wbp, p, len);
 453                         }
 454                     }
 455                 }
 456                 else
 457                     /* keyword __ToLatin1__ found? -> don't copy keyword and start conversion */
 458                     if( (tp->type == NAME) && (strncmp( (char*)p, "__ToLatin1__", len) == 0) )
 459                     {
 460                         EBCDIC_StartTokenDetected = 1;
 461                         len = 0;
 462                     }
 463                     else
 464                         memcpy(wbp, p, len);
 465             }
 466             else
 467                 memcpy(wbp, p, len);
 468
 469             wbp += len;
 470         }
 471         else
 472             *wbp++ = '\n';
 473
 474         if (wbp >= &wbuf[OBS])
 475         {
 476             if ( write(1, wbuf, OBS) != -1 ) {
 477             if (wbp > &wbuf[OBS])
 478                 memcpy(wbuf, wbuf + OBS, wbp - &wbuf[OBS]);
 479             wbp -= OBS;
 480         }
 481         else exit(1);
 482         }
 483     }
 484     trp->tp = tp;
 485     if (cursource->fd == 0)
 486         flushout();
 487 }
 488
 489 void
 490     flushout(void)
 491 {
 492     if (wbp > wbuf)
 493     {
 494         if ( write(1, wbuf, wbp - wbuf) != -1)
 495             wbp = wbuf;
 496     else
 497         exit(1);
 498     }
 499 }
 500
 501 /*
 502  * turn a row into just a newline
 503  */
 504 void
 505     setempty(Tokenrow * trp)
 506 {
 507     trp->tp = trp->bp;
 508     trp->lp = trp->bp + 1;
 509     *trp->bp = nltoken;
 510 }
 511
 512 /*
 513  * generate a number
 514  */
 515 char *
 516     outnum(char *p, int n)
 517 {
 518     if (n >= 10)
 519         p = outnum(p, n / 10);
 520     *p++ = (char) (n % 10 + '0');
 521     return p;
 522 }
 523
 524 /*
 525  * allocate and initialize a new string from s, of length l, at offset o
 526  * Null terminated.
 527  */
 528 uchar *
 529     newstring(uchar * s, int l, int o)
 530 {
 531     uchar *ns = (uchar *) domalloc(l + o + 1);
 532
 533     ns[l + o] = '\0';
 534     return (uchar *) strncpy((char *) ns + o, (char *) s, l) - o;
 535 }