soltools/cpp/_tokens.c

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <assert.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <ctype.h>
  25 #if (defined(_WIN32) || defined(__IBMC__))
  26 #include <io.h>
  27 #else
  28 #include <unistd.h>
  29 #endif
  30 #include "cpp.h"
  31
  32
  33 static char wbuf[4 * OBS];
  34 static char *wbp = wbuf;
  35 static int EBCDIC_ExternTokenDetected = 0;
  36 static int EBCDIC_StartTokenDetected = 0;
  37
  38 static unsigned char toLatin1[256] =
  39 {
  40     0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d,
  41     0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13,
  42     0x9d, 0x0a, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d,
  43     0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b,
  44     0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 0x90, 0x91,
  45     0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b,
  46     0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1,
  47     0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
  48     0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf,
  49     0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 0x2d, 0x2f, 0xc2, 0xc4,
  50     0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f,
  51     0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
  52     0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
  53     0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
  54     0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, 0xb0, 0x6a, 0x6b, 0x6c,
  55     0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8,
  56     0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
  57     0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3,
  58     0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8,
  59     0xaf, 0x5d, 0xb4, 0xd7, 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45,
  60     0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
  61     0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52,
  62     0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54,
  63     0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2,
  64     0xd3, 0xd5, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  65     0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
  66 };
  67
  68 #define MASK    "\\x%x"
  69
  70 static int
  71     memcpy_EBCDIC( char * pwbuf, uchar const *p, int len )
  72 {
  73     int currpos = 0;
  74     int processedchars = 0;
  75
  76     if( len == 0 )
  77         return 0;
  78
  79     if( len == 1 )
  80     {
  81         *pwbuf = *p;
  82         return 1;
  83     }
  84
  85     /* copy spaces until " or ' */
  86     while( (p[ processedchars ] != '\"') && (p[ processedchars ] != '\'') )
  87         pwbuf[ currpos++ ] = p[ processedchars++ ];
  88
  89     /* copy first " or ' */
  90     pwbuf[ currpos++ ] = p[ processedchars++ ];
  91
  92     /* convert all characters until " or ' */
  93     while( processedchars < (len - 1) )
  94     {
  95         if( p[ processedchars ] == '\\' )
  96         {
  97             switch( p[ ++processedchars ] )
  98             {
  99                 case 'n':
 100                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\n'] );
 101                     processedchars++;
 102                     break;
 103
 104                 case 't':
 105                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\t'] );
 106                     processedchars++;
 107                     break;
 108
 109                 case 'v':
 110                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\v'] );
 111                     processedchars++;
 112                     break;
 113
 114                 case 'b':
 115                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\b'] );
 116                     processedchars++;
 117                     break;
 118
 119                 case 'r':
 120                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\r'] );
 121                     processedchars++;
 122                     break;
 123
 124                 case 'f':
 125                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\f'] );
 126                     processedchars++;
 127                     break;
 128
 129                 case 'a':
 130                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\a'] );
 131                     processedchars++;
 132                     break;
 133
 134                 case '\\':
 135                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\\'] );
 136                     processedchars++;
 137                     break;
 138
 139                 case '?':
 140                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\?'] );
 141                     processedchars++;
 142                     break;
 143
 144                 case '\'':
 145                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\''] );
 146                     processedchars++;
 147                     break;
 148
 149                 case '"':
 150                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\"'] );
 151                     processedchars++;
 152                     break;
 153
 154                 /* octal coded character? -> copy */
 155                 case '0':
 156                 case '1':
 157                 case '2':
 158                 case '3':
 159                 case '4':
 160                 case '5':
 161                 case '6':
 162                 case '7':
 163                     {
 164                     int startpos = currpos;
 165
 166                     pwbuf[ currpos++ ] = '\\';
 167
 168                     while( p[ processedchars ] >= '0' && p[ processedchars ] <= '7' && (currpos < startpos + 4) )
 169                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
 170                     break;
 171                     }
 172
 173                 /* hex coded character? -> copy */
 174                 case 'x':
 175                 case 'X':
 176                     {
 177                     int startpos = currpos;
 178
 179                     pwbuf[ currpos++ ] = '\\';
 180                     pwbuf[ currpos++ ] = 'x';
 181                     processedchars++;
 182
 183                     while( isxdigit( p[ processedchars ] ) && (currpos < startpos + 4) )
 184                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
 185                     break;
 186                     }
 187
 188             }
 189         }
 190         else
 191             currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1[p[ processedchars++ ]] );
 192
 193     }
 194
 195     /* copy last " or ' */
 196     pwbuf[ currpos++ ] = p[ processedchars ];
 197
 198     return currpos;
 199 }
 200
 201 void
 202     maketokenrow(int size, Tokenrow * trp)
 203 {
 204     trp->max = size;
 205     if (size > 0)
 206         trp->bp = (Token *) domalloc(size * sizeof(Token));
 207     else
 208         trp->bp = NULL;
 209     trp->tp = trp->bp;
 210     trp->lp = trp->bp;
 211 }
 212
 213 Token *
 214     growtokenrow(Tokenrow * trp)
 215 {
 216     size_t ncur = trp->tp - trp->bp;
 217     size_t nlast = trp->lp - trp->bp;
 218
 219     trp->max = 3 * trp->max / 2 + 1;
 220     trp->bp = (Token *) realloc(trp->bp, trp->max * sizeof(Token));
 221     assert(trp->bp); // realloc failure is OOM -> no point to handle
 222     trp->lp = &trp->bp[nlast];
 223     trp->tp = &trp->bp[ncur];
 224     return trp->lp;
 225 }
 226
 227 /*
 228  * Compare a row of tokens, ignoring the content of WS; return !=0 if different
 229  */
 230 int
 231     comparetokens(Tokenrow * tr1, Tokenrow * tr2)
 232 {
 233     Token *tp1, *tp2;
 234
 235     tp1 = tr1->tp;
 236     tp2 = tr2->tp;
 237     if (tr1->lp - tp1 != tr2->lp - tp2)
 238         return 1;
 239     for (; tp1 < tr1->lp; tp1++, tp2++)
 240     {
 241         if (tp1->type != tp2->type
 242             || (tp1->wslen == 0) != (tp2->wslen == 0)
 243             || tp1->len != tp2->len
 244             || strncmp((char *) tp1->t, (char *) tp2->t, tp1->len) != 0)
 245             return 1;
 246     }
 247     return 0;
 248 }
 249
 250 /*
 251  * replace ntok tokens starting at dtr->tp with the contents of str.
 252  * tp ends up pointing just beyond the replacement.
 253  * Canonical whitespace is assured on each side.
 254  */
 255 void
 256     insertrow(Tokenrow * dtr, int ntok, Tokenrow const * str)
 257 {
 258     int nrtok = (int)rowlen(str);
 259
 260     dtr->tp += ntok;
 261     adjustrow(dtr, nrtok - ntok);
 262     dtr->tp -= ntok;
 263     movetokenrow(dtr, str);
 264     dtr->tp += nrtok;
 265 }
 266
 267 /*
 268  * make sure there is WS before trp->tp, if tokens might merge in the output
 269  */
 270 void
 271     makespace(Tokenrow * trp, Token const * ntp)
 272 {
 273     uchar *tt;
 274     Token *tp = trp->tp;
 275
 276     if (tp >= trp->lp)
 277         return;
 278
 279     if (ntp->wslen)
 280     {
 281         tt = newstring(tp->t, tp->len, ntp->wslen);
 282         strncpy((char *)tt, (char *)ntp->t - ntp->wslen, ntp->wslen);
 283         tp->t = tt + ntp->wslen;
 284         tp->wslen = ntp->wslen;
 285     }
 286 }
 287
 288 /*
 289  * Copy an entire tokenrow into another, at tp.
 290  * It is assumed that there is enough space.
 291  *  Not strictly conforming.
 292  */
 293 void
 294     movetokenrow(Tokenrow * dtr, Tokenrow const * str)
 295 {
 296     size_t nby;
 297
 298     nby = (char *) str->lp - (char *) str->bp;
 299     if (nby)
 300         memmove(dtr->tp, str->bp, nby);
 301 }
 302
 303 /*
 304  * Move the tokens in a row, starting at tr->tp, rightward by nt tokens;
 305  * nt may be negative (left move).
 306  * The row may need to be grown.
 307  * Non-strictly conforming because of the (char *), but easily fixed
 308  */
 309 void
 310     adjustrow(Tokenrow * trp, int nt)
 311 {
 312     size_t nby, size;
 313
 314     if (nt == 0)
 315         return;
 316     size = (trp->lp - trp->bp) + nt;
 317     while (size > trp->max)
 318         growtokenrow(trp);
 319     nby = (char *) trp->lp - (char *) trp->tp;
 320     if (nby)
 321         memmove(trp->tp + nt, trp->tp, nby);
 322     trp->lp += nt;
 323 }
 324
 325 /*
 326  * Copy a row of tokens into the destination holder, allocating
 327  * the space for the contents.  Return the destination.
 328  */
 329 Tokenrow *
 330     copytokenrow(Tokenrow * dtr, Tokenrow const * str)
 331 {
 332     int len = (int)rowlen(str);
 333
 334     maketokenrow(len, dtr);
 335     movetokenrow(dtr, str);
 336     if (len != 0)
 337         dtr->lp += len;
 338     return dtr;
 339 }
 340
 341 /*
 342  * Produce a copy of a row of tokens.  Start at trp->tp.
 343  * The value strings are copied as well.  The first token
 344  * has WS available.
 345  */
 346 Tokenrow *
 347     normtokenrow(Tokenrow * trp)
 348 {
 349     Token *tp;
 350     Tokenrow *ntrp = new(Tokenrow);
 351     int len;
 352
 353     len = (int)(trp->lp - trp->tp);
 354     if (len <= 0)
 355         len = 1;
 356     maketokenrow(len, ntrp);
 357     for (tp = trp->tp; tp < trp->lp; tp++)
 358     {
 359         *ntrp->lp = *tp;
 360         if (tp->len)
 361         {
 362             ntrp->lp->t = newstring(tp->t, tp->len, 1);
 363             *ntrp->lp->t++ = ' ';
 364             if (tp->wslen)
 365                 ntrp->lp->wslen = 1;
 366         }
 367         ntrp->lp++;
 368     }
 369     if (ntrp->lp > ntrp->bp)
 370         ntrp->bp->wslen = 0;
 371     return ntrp;
 372 }
 373
 374 /*
 375  * Debugging
 376  */
 377 void
 378     peektokens(Tokenrow * trp, char *str)
 379 {
 380     Token *tp;
 381
 382     tp = trp->tp;
 383     flushout();
 384     if (str)
 385         fprintf(stderr, "%s ", str);
 386     if (tp < trp->bp || tp > trp->lp)
 387         fprintf(stderr, "(tp offset %ld) ", (long int) (tp - trp->bp));
 388     for (tp = trp->bp; tp < trp->lp && tp < trp->bp + 32; tp++)
 389     {
 390         if (tp->type != NL)
 391         {
 392             int c = tp->t[tp->len];
 393
 394             tp->t[tp->len] = 0;
 395             fprintf(stderr, "%s", tp->t);
 396             tp->t[tp->len] = (uchar) c;
 397         }
 398         fprintf(stderr, tp == trp->tp ? "{%x*} " : "{%x} ", tp->type);
 399     }
 400     fprintf(stderr, "\n");
 401     fflush(stderr);
 402 }
 403
 404 void
 405     puttokens(Tokenrow * trp)
 406 {
 407     Token *tp;
 408     int len;
 409     uchar *p;
 410
 411     if (Vflag)
 412         peektokens(trp, "");
 413     tp = trp->bp;
 414     for (; tp < trp->lp; tp++)
 415     {
 416         if (tp->type != NL)
 417         {
 418             len = (int)(tp->len + tp->wslen);
 419             p = tp->t - tp->wslen;
 420
 421             /* add parameter check to delete operator? */
 422             if( Dflag )
 423             {
 424                 if( (tp->type == NAME) && (strncmp( (char*)p, "delete", len ) == 0) )
 425                 {
 426                     Token* ntp = tp;
 427                     ntp++;
 428
 429                     if( ntp->type == NAME )
 430                     {
 431                         uchar* np = ntp->t - ntp->wslen;
 432                         int nlen = (int)(ntp->len + ntp->wslen);
 433
 434                         memcpy(wbp, "if(", 3 );
 435                         wbp += 4;
 436                         memcpy(wbp, np, nlen );
 437                         wbp += nlen;
 438                         memcpy(wbp, ")", 1 );
 439                         wbp++;
 440
 441                         memcpy(wbp, p, len);
 442                     }
 443                 }
 444             }
 445
 446             /* EBCDIC to ANSI conversion requested? */
 447             if( Aflag )
 448             {
 449                 /* keyword __ToLatin1__ found? -> do conversion! */
 450                 if( EBCDIC_StartTokenDetected )
 451                 {
 452                     /* previous token was 'extern'? -> don't convert current token! */
 453                     if( EBCDIC_ExternTokenDetected )
 454                     {
 455                         EBCDIC_ExternTokenDetected = 0;
 456                         memcpy(wbp, p, len);
 457                     }
 458                     else
 459                     {
 460                         /* current token is keyword 'extern'? -> don't convert following token! */
 461                         if( (tp->wslen == 0) && (strncmp( (char*)p, "extern", len ) == 0) )
 462                         {
 463                             EBCDIC_ExternTokenDetected = 1;
 464                             memcpy(wbp, p, len);
 465                         }
 466                         else
 467                         {
 468                             /* token is string or char? -> process EBCDIC to ANSI conversion */
 469                             if ((tp->type == STRING) || (tp->type == CCON))
 470                                 len = memcpy_EBCDIC(wbp,  p, len);
 471                             else
 472                                 memcpy(wbp, p, len);
 473                         }
 474                     }
 475                 }
 476                 else
 477                     /* keyword __ToLatin1__ found? -> don't copy keyword and start conversion */
 478                     if( (tp->type == NAME) && (strncmp( (char*)p, "__ToLatin1__", len) == 0) )
 479                     {
 480                         EBCDIC_StartTokenDetected = 1;
 481                         len = 0;
 482                     }
 483                     else
 484                         memcpy(wbp, p, len);
 485             }
 486             else
 487                 memcpy(wbp, p, len);
 488
 489             wbp += len;
 490         }
 491         else
 492             *wbp++ = '\n';
 493
 494         if (wbp >= &wbuf[OBS])
 495         {
 496             if ( write(1, wbuf, OBS) != -1 ) {
 497             if (wbp > &wbuf[OBS])
 498                 memmove(wbuf, wbuf + OBS, wbp - &wbuf[OBS]);
 499             wbp -= OBS;
 500         }
 501         else exit(1);
 502         }
 503     }
 504     trp->tp = tp;
 505     if (cursource->fd == 0)
 506         flushout();
 507 }
 508
 509 void
 510     flushout(void)
 511 {
 512     if (wbp > wbuf)
 513     {
 514         if ( write(1, wbuf, (int)(wbp - wbuf)) != -1)
 515             wbp = wbuf;
 516         else
 517             exit(1);
 518     }
 519 }
 520
 521 /*
 522  * turn a row into just a newline
 523  */
 524 void
 525     setempty(Tokenrow * trp)
 526 {
 527     trp->tp = trp->bp;
 528     trp->lp = trp->bp + 1;
 529     *trp->bp = nltoken;
 530 }
 531
 532 /*
 533  * generate a number
 534  */
 535 char *
 536     outnum(char *p, int n)
 537 {
 538     if (n >= 10)
 539         p = outnum(p, n / 10);
 540     *p++ = (char) (n % 10 + '0');
 541     return p;
 542 }
 543
 544 /*
 545  * allocate and initialize a new string from s, of length l, at offset o
 546  * Null terminated.
 547  */
 548 uchar *
 549     newstring(uchar const * s, size_t l, size_t o)
 550 {
 551     uchar *ns = (uchar *) domalloc(l + o + 1);
 552
 553     ns[l + o] = '\0';
 554     return (uchar *) strncpy((char *) ns + o, (char *) s, l) - o;
 555 }
 556
 557 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */