soltools/cpp/_tokens.c

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <ctype.h>
  24 #if (defined(_WIN32) || defined(__IBMC__))
  25 #include <io.h>
  26 #else
  27 #include <unistd.h>
  28 #endif
  29 #include "cpp.h"
  30
  31
  32 static char wbuf[4 * OBS];
  33 static char *wbp = wbuf;
  34 static int EBCDIC_ExternTokenDetected = 0;
  35 static int EBCDIC_StartTokenDetected = 0;
  36
  37 unsigned char toLatin1[256] =
  38 {
  39     0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d,
  40     0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13,
  41     0x9d, 0x0a, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d,
  42     0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b,
  43     0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 0x90, 0x91,
  44     0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b,
  45     0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1,
  46     0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
  47     0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf,
  48     0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 0x2d, 0x2f, 0xc2, 0xc4,
  49     0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f,
  50     0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
  51     0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
  52     0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
  53     0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, 0xb0, 0x6a, 0x6b, 0x6c,
  54     0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8,
  55     0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
  56     0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3,
  57     0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8,
  58     0xaf, 0x5d, 0xb4, 0xd7, 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45,
  59     0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
  60     0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52,
  61     0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54,
  62     0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2,
  63     0xd3, 0xd5, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  64     0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
  65 };
  66
  67 #define MASK    "\\x%x"
  68
  69 int
  70     memcpy_EBCDIC( char * pwbuf, uchar *p, int len )
  71 {
  72     int currpos = 0;
  73     int processedchars = 0;
  74
  75     if( len == 0 )
  76         return 0;
  77
  78     if( len == 1 )
  79     {
  80         *pwbuf = *p;
  81         return 1;
  82     }
  83
  84     /* copy spaces until " or ' */
  85     while( (p[ processedchars ] != '\"') && (p[ processedchars ] != '\'') )
  86         pwbuf[ currpos++ ] = p[ processedchars++ ];
  87
  88     /* copy first " or ' */
  89     pwbuf[ currpos++ ] = p[ processedchars++ ];
  90
  91     /* convert all characters until " or ' */
  92     while( processedchars < (len - 1) )
  93     {
  94         if( p[ processedchars ] == '\\' )
  95         {
  96             switch( p[ ++processedchars ] )
  97             {
  98                 case 'n':
  99                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\n'] );
 100                     processedchars++;
 101                     break;
 102
 103                 case 't':
 104                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\t'] );
 105                     processedchars++;
 106                     break;
 107
 108                 case 'v':
 109                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\v'] );
 110                     processedchars++;
 111                     break;
 112
 113                 case 'b':
 114                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\b'] );
 115                     processedchars++;
 116                     break;
 117
 118                 case 'r':
 119                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\r'] );
 120                     processedchars++;
 121                     break;
 122
 123                 case 'f':
 124                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\f'] );
 125                     processedchars++;
 126                     break;
 127
 128                 case 'a':
 129                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\a'] );
 130                     processedchars++;
 131                     break;
 132
 133                 case '\\':
 134                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\\'] );
 135                     processedchars++;
 136                     break;
 137
 138                 case '?':
 139                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\?'] );
 140                     processedchars++;
 141                     break;
 142
 143                 case '\'':
 144                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\''] );
 145                     processedchars++;
 146                     break;
 147
 148                 case '"':
 149                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\"'] );
 150                     processedchars++;
 151                     break;
 152
 153                 /* octal coded character? -> copy */
 154                 case '0':
 155                 case '1':
 156                 case '2':
 157                 case '3':
 158                 case '4':
 159                 case '5':
 160                 case '6':
 161                 case '7':
 162                     {
 163                     int startpos = currpos;
 164
 165                     pwbuf[ currpos++ ] = '\\';
 166
 167                     while( p[ processedchars ] >= '0' && p[ processedchars ] <= '7' && (currpos < startpos + 4) )
 168                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
 169                     break;
 170                     }
 171
 172                 /* hex coded character? -> copy */
 173                 case 'x':
 174                 case 'X':
 175                     {
 176                     int startpos = currpos;
 177
 178                     pwbuf[ currpos++ ] = '\\';
 179                     pwbuf[ currpos++ ] = 'x';
 180                     processedchars++;
 181
 182                     while( isxdigit( p[ processedchars ] ) && (currpos < startpos + 4) )
 183                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
 184                     break;
 185                     }
 186
 187             }
 188         }
 189         else
 190             currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1[p[ processedchars++ ]] );
 191
 192     }
 193
 194     /* copy last " or ' */
 195     pwbuf[ currpos++ ] = p[ processedchars ];
 196
 197     return currpos;
 198 }
 199
 200 void
 201     maketokenrow(int size, Tokenrow * trp)
 202 {
 203     trp->max = size;
 204     if (size > 0)
 205         trp->bp = (Token *) domalloc(size * sizeof(Token));
 206     else
 207         trp->bp = NULL;
 208     trp->tp = trp->bp;
 209     trp->lp = trp->bp;
 210 }
 211
 212 Token *
 213     growtokenrow(Tokenrow * trp)
 214 {
 215     size_t ncur = trp->tp - trp->bp;
 216     size_t nlast = trp->lp - trp->bp;
 217
 218     trp->max = 3 * trp->max / 2 + 1;
 219     trp->bp = (Token *) realloc(trp->bp, trp->max * sizeof(Token));
 220     trp->lp = &trp->bp[nlast];
 221     trp->tp = &trp->bp[ncur];
 222     return trp->lp;
 223 }
 224
 225 /*
 226  * Compare a row of tokens, ignoring the content of WS; return !=0 if different
 227  */
 228 int
 229     comparetokens(Tokenrow * tr1, Tokenrow * tr2)
 230 {
 231     Token *tp1, *tp2;
 232
 233     tp1 = tr1->tp;
 234     tp2 = tr2->tp;
 235     if (tr1->lp - tp1 != tr2->lp - tp2)
 236         return 1;
 237     for (; tp1 < tr1->lp; tp1++, tp2++)
 238     {
 239         if (tp1->type != tp2->type
 240             || (tp1->wslen == 0) != (tp2->wslen == 0)
 241             || tp1->len != tp2->len
 242             || strncmp((char *) tp1->t, (char *) tp2->t, tp1->len) != 0)
 243             return 1;
 244     }
 245     return 0;
 246 }
 247
 248 /*
 249  * replace ntok tokens starting at dtr->tp with the contents of str.
 250  * tp ends up pointing just beyond the replacement.
 251  * Canonical whitespace is assured on each side.
 252  */
 253 void
 254     insertrow(Tokenrow * dtr, int ntok, Tokenrow * str)
 255 {
 256     int nrtok = (int)rowlen(str);
 257
 258     dtr->tp += ntok;
 259     adjustrow(dtr, nrtok - ntok);
 260     dtr->tp -= ntok;
 261     movetokenrow(dtr, str);
 262     dtr->tp += nrtok;
 263 }
 264
 265 /*
 266  * make sure there is WS before trp->tp, if tokens might merge in the output
 267  */
 268 void
 269     makespace(Tokenrow * trp, Token * ntp)
 270 {
 271     uchar *tt;
 272     Token *tp = trp->tp;
 273
 274     if (tp >= trp->lp)
 275         return;
 276
 277     if (ntp->wslen)
 278     {
 279         tt = newstring(tp->t, tp->len, ntp->wslen);
 280         strncpy((char *)tt, (char *)ntp->t - ntp->wslen, ntp->wslen);
 281         tp->t = tt + ntp->wslen;
 282         tp->wslen = ntp->wslen;
 283         tp->flag |= XPWS;
 284     }
 285 }
 286
 287 /*
 288  * Copy an entire tokenrow into another, at tp.
 289  * It is assumed that there is enough space.
 290  *  Not strictly conforming.
 291  */
 292 void
 293     movetokenrow(Tokenrow * dtr, Tokenrow * str)
 294 {
 295     size_t nby;
 296
 297     nby = (char *) str->lp - (char *) str->bp;
 298     memmove(dtr->tp, str->bp, nby);
 299 }
 300
 301 /*
 302  * Move the tokens in a row, starting at tr->tp, rightward by nt tokens;
 303  * nt may be negative (left move).
 304  * The row may need to be grown.
 305  * Non-strictly conforming because of the (char *), but easily fixed
 306  */
 307 void
 308     adjustrow(Tokenrow * trp, int nt)
 309 {
 310     size_t nby, size;
 311
 312     if (nt == 0)
 313         return;
 314     size = (trp->lp - trp->bp) + nt;
 315     while (size > trp->max)
 316         growtokenrow(trp);
 317     nby = (char *) trp->lp - (char *) trp->tp;
 318     if (nby)
 319         memmove(trp->tp + nt, trp->tp, nby);
 320     trp->lp += nt;
 321 }
 322
 323 /*
 324  * Copy a row of tokens into the destination holder, allocating
 325  * the space for the contents.  Return the destination.
 326  */
 327 Tokenrow *
 328     copytokenrow(Tokenrow * dtr, Tokenrow * str)
 329 {
 330     int len = (int)rowlen(str);
 331
 332     maketokenrow(len, dtr);
 333     movetokenrow(dtr, str);
 334     dtr->lp += len;
 335     return dtr;
 336 }
 337
 338 /*
 339  * Produce a copy of a row of tokens.  Start at trp->tp.
 340  * The value strings are copied as well.  The first token
 341  * has WS available.
 342  */
 343 Tokenrow *
 344     normtokenrow(Tokenrow * trp)
 345 {
 346     Token *tp;
 347     Tokenrow *ntrp = new(Tokenrow);
 348     int len;
 349
 350     len = (int)(trp->lp - trp->tp);
 351     if (len <= 0)
 352         len = 1;
 353     maketokenrow(len, ntrp);
 354     for (tp = trp->tp; tp < trp->lp; tp++)
 355     {
 356         *ntrp->lp = *tp;
 357         if (tp->len)
 358         {
 359             ntrp->lp->t = newstring(tp->t, tp->len, 1);
 360             *ntrp->lp->t++ = ' ';
 361             if (tp->wslen)
 362                 ntrp->lp->wslen = 1;
 363         }
 364         ntrp->lp++;
 365     }
 366     if (ntrp->lp > ntrp->bp)
 367         ntrp->bp->wslen = 0;
 368     return ntrp;
 369 }
 370
 371 /*
 372  * Debugging
 373  */
 374 void
 375     peektokens(Tokenrow * trp, char *str)
 376 {
 377     Token *tp;
 378
 379     tp = trp->tp;
 380     flushout();
 381     if (str)
 382         fprintf(stderr, "%s ", str);
 383     if (tp < trp->bp || tp > trp->lp)
 384         fprintf(stderr, "(tp offset %ld) ", (long int) (tp - trp->bp));
 385     for (tp = trp->bp; tp < trp->lp && tp < trp->bp + 32; tp++)
 386     {
 387         if (tp->type != NL)
 388         {
 389             int c = tp->t[tp->len];
 390
 391             tp->t[tp->len] = 0;
 392             fprintf(stderr, "%s", tp->t);
 393             tp->t[tp->len] = (uchar) c;
 394         }
 395         fprintf(stderr, tp == trp->tp ? "{%x*} " : "{%x} ", tp->type);
 396     }
 397     fprintf(stderr, "\n");
 398     fflush(stderr);
 399 }
 400
 401 void
 402     puttokens(Tokenrow * trp)
 403 {
 404     Token *tp;
 405     int len;
 406     uchar *p;
 407
 408     if (Vflag)
 409         peektokens(trp, "");
 410     tp = trp->bp;
 411     for (; tp < trp->lp; tp++)
 412     {
 413         if (tp->type != NL)
 414         {
 415             len = (int)(tp->len + tp->wslen);
 416             p = tp->t - tp->wslen;
 417
 418             /* add parameter check to delete operator? */
 419             if( Dflag )
 420             {
 421                 if( (tp->type == NAME) && (strncmp( (char*)p, "delete", len ) == 0) )
 422                 {
 423                     Token* ntp = tp;
 424                     ntp++;
 425
 426                     if( ntp->type == NAME )
 427                     {
 428                         uchar* np = ntp->t - ntp->wslen;
 429                         int nlen = (int)(ntp->len + ntp->wslen);
 430
 431                         memcpy(wbp, "if(", 3 );
 432                          wbp += 4;
 433                         memcpy(wbp, np, nlen );
 434                          wbp += nlen;
 435                         memcpy(wbp, ")", 1 );
 436                          wbp++;
 437
 438                         memcpy(wbp, p, len);
 439                     }
 440                 }
 441             }
 442
 443             /* EBCDIC to ANSI conversion requested? */
 444             if( Aflag )
 445             {
 446                 /* keyword __ToLatin1__ found? -> do conversion! */
 447                 if( EBCDIC_StartTokenDetected )
 448                 {
 449                     /* previous token was 'extern'? -> don't convert current token! */
 450                     if( EBCDIC_ExternTokenDetected )
 451                     {
 452                         EBCDIC_ExternTokenDetected = 0;
 453                         memcpy(wbp, p, len);
 454                     }
 455                     else
 456                     {
 457                         /* current token is keyword 'extern'? -> don't convert following token! */
 458                         if( (tp->wslen == 0) && (strncmp( (char*)p, "extern", len ) == 0) )
 459                         {
 460                             EBCDIC_ExternTokenDetected = 1;
 461                             memcpy(wbp, p, len);
 462                         }
 463                         else
 464                         {
 465                             /* token is string or char? -> process EBCDIC to ANSI conversion */
 466                             if ((tp->type == STRING) || (tp->type == CCON))
 467                                 len = memcpy_EBCDIC(wbp,  p, len);
 468                             else
 469                                 memcpy(wbp, p, len);
 470                         }
 471                     }
 472                 }
 473                 else
 474                     /* keyword __ToLatin1__ found? -> don't copy keyword and start conversion */
 475                     if( (tp->type == NAME) && (strncmp( (char*)p, "__ToLatin1__", len) == 0) )
 476                     {
 477                         EBCDIC_StartTokenDetected = 1;
 478                         len = 0;
 479                     }
 480                     else
 481                         memcpy(wbp, p, len);
 482             }
 483             else
 484                 memcpy(wbp, p, len);
 485
 486             wbp += len;
 487         }
 488         else
 489             *wbp++ = '\n';
 490
 491         if (wbp >= &wbuf[OBS])
 492         {
 493             if ( write(1, wbuf, OBS) != -1 ) {
 494             if (wbp > &wbuf[OBS])
 495                 memmove(wbuf, wbuf + OBS, wbp - &wbuf[OBS]);
 496             wbp -= OBS;
 497         }
 498         else exit(1);
 499         }
 500     }
 501     trp->tp = tp;
 502     if (cursource->fd == 0)
 503         flushout();
 504 }
 505
 506 void
 507     flushout(void)
 508 {
 509     if (wbp > wbuf)
 510     {
 511         if ( write(1, wbuf, (int)(wbp - wbuf)) != -1)
 512             wbp = wbuf;
 513     else
 514         exit(1);
 515     }
 516 }
 517
 518 /*
 519  * turn a row into just a newline
 520  */
 521 void
 522     setempty(Tokenrow * trp)
 523 {
 524     trp->tp = trp->bp;
 525     trp->lp = trp->bp + 1;
 526     *trp->bp = nltoken;
 527 }
 528
 529 /*
 530  * generate a number
 531  */
 532 char *
 533     outnum(char *p, int n)
 534 {
 535     if (n >= 10)
 536         p = outnum(p, n / 10);
 537     *p++ = (char) (n % 10 + '0');
 538     return p;
 539 }
 540
 541 /*
 542  * allocate and initialize a new string from s, of length l, at offset o
 543  * Null terminated.
 544  */
 545 uchar *
 546     newstring(uchar * s, size_t l, size_t o)
 547 {
 548     uchar *ns = (uchar *) domalloc(l + o + 1);
 549
 550     ns[l + o] = '\0';
 551     return (uchar *) strncpy((char *) ns + o, (char *) s, l) - o;
 552 }
 553
 554 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */