wmake/src/wmkdependScanner.cpp

   1 /*---------------------------------*- C++ -*---------------------------------*\
   2   =========                 |
   3   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
   4    \\    /   O peration     |
   5     \\  /    A nd           | Copyright (C) 2011 OpenFOAM Foundation
   6      \\/     M anipulation  |
   7 -------------------------------------------------------------------------------
   8 License
   9     This file is part of OpenFOAM.
  10
  11     OpenFOAM is free software: you can redistribute it and/or modify it
  12     under the terms of the GNU General Public License as published by
  13     the Free Software Foundation, either version 3 of the License, or
  14     (at your option) any later version.
  15
  16     OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
  17     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  18     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  19     for more details.
  20
  21     You should have received a copy of the GNU General Public License
  22     along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
  23
  24 @file wmkdependParser.atg
  25
  26 Description
  27     An attributed Coco/R grammar to parse C/C++, Fortran and Java files
  28     for include and import statements.
  29
  30 SourceFiles
  31     generated
  32
  33 \*---------------------------------------------------------------------------*/
  34 // This file was generated with Coco/R C++ (10 Mar 2010)
  35 // http://www.ssw.uni-linz.ac.at/coco/
  36 // with these defines:
  37 //     - FORCE_UTF8
  38
  39
  40 #include <sstream>
  41
  42 // io.h and fcntl are used to ensure binary read from streams on windows
  43 #ifdef _WIN32
  44 # include <io.h>
  45 # include <fcntl.h>
  46 #endif
  47
  48 #include <climits>
  49
  50 #include "wmkdependScanner.h"
  51
  52 // values for the file stream buffering
  53 #define MIN_BUFFER_LENGTH 1024        // 1KB
  54 #define MAX_BUFFER_LENGTH (64*MIN_BUFFER_LENGTH)   // 64KB
  55 // value for the heap management
  56 #define HEAP_BLOCK_SIZE   (64*1024)   // 64KB
  57
  58
  59 namespace wmake {
  60
  61 // * * * * * * * * * * * Miscellaneous String Routines * * * * * * * * * * * //
  62
  63 //
  64 // string handling, byte character
  65 //
  66
  67 std::string coco_stdString(const wchar_t* str)
  68 {
  69     return str ? coco_stdString(str, 0, wcslen(str)) : std::string();
  70 }
  71
  72
  73 std::string coco_stdString(const wchar_t* str, unsigned length)
  74 {
  75     return coco_stdString(str, 0, length);
  76 }
  77
  78
  79 std::string coco_stdString(const wchar_t* str, unsigned index, unsigned length)
  80 {
  81     const unsigned len = (str && *str) ? length : 0;
  82     std::string dst;
  83     dst.reserve(len);
  84
  85     for (unsigned i = 0; i < len; ++i)
  86     {
  87         dst += char(str[index+i] & 0xFF);
  88     }
  89
  90     return dst;
  91 }
  92
  93
  94 std::string coco_stdStringUTF8(const wchar_t* str)
  95 {
  96     return str ? coco_stdStringUTF8(str, 0, wcslen(str)) : std::string();
  97 }
  98
  99
 100 std::string coco_stdStringUTF8(const wchar_t* str, unsigned length)
 101 {
 102     return coco_stdStringUTF8(str, 0, length);
 103 }
 104
 105
 106 std::string coco_stdStringUTF8(const wchar_t* str, unsigned index, unsigned length)
 107 {
 108     const unsigned len = (str && *str) ? length : 0;
 109     std::string dst;
 110     dst.reserve(len);
 111
 112     for (unsigned i = 0; i < len; ++i)
 113     {
 114         wchar_t wc = str[index+i];
 115
 116         if (!(wc & ~0x0000007F))
 117         {
 118             // 0x00000000 - 0x0000007F [min. 8bit storage, 1-byte encoding)
 119             // 0aaaaaaa
 120             dst += char(wc);
 121         }
 122         else if (!(wc & ~0x000007FF))
 123         {
 124             // 0x00000080 - 0x000007FF [min. 16bit storage, 2-byte encoding]
 125             // 110bbbaa 10aaaaaa
 126             dst += char(0xC0 | ((wc >> 6) & 0x1F));
 127             dst += char(0x80 | ((wc) & 0x3F));
 128         }
 129         else if (!(wc & ~0x0000FFFF))
 130         {
 131             // 0x00000800 - 0x0000FFFF [min. 16bit storage, 3-byte encoding]
 132             // 1110bbbb 10bbbbaa 10aaaaaa
 133             dst += char(0xE0 | ((wc >> 12) & 0x0F));
 134             dst += char(0x80 | ((wc >> 6) & 0x3F));
 135             dst += char(0x80 | ((wc) & 0x3F));
 136         }
 137         else if (!(wc & ~0x001FFFFF))
 138         {
 139             // 0x00010000 - 0x001FFFFF [min. 24bit storage, 4-byte encoding]
 140             // 11110ccc 10ccbbbb 10bbbbaa 10aaaaaa
 141             dst += char(0xF0 | ((wc >> 18) & 0x07));
 142             dst += char(0x80 | ((wc >> 12) & 0x3F));
 143             dst += char(0x80 | ((wc >> 6) & 0x3F));
 144             dst += char(0x80 | ((wc) & 0x3F));
 145         }
 146 //
 147 // Not (yet) used - wchar_t storage is limited to 16bit on windows
 148 // This also corresponds to the unicode BMP (Basic Multilingual Plane)
 149 //
 150 //        else if (!(wc & ~0x03FFFFFF))
 151 //        {
 152 //            // 0x00200000 - 0x03FFFFFF [min. 32bit storage, 5-byte encoding]
 153 //            // 111110dd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa
 154 //            dst += char(0xF8 | ((wc >> 24) & 0x03));
 155 //            dst += char(0x80 | ((wc >> 18) & 0x3F));
 156 //            dst += char(0x80 | ((wc >> 12) & 0x3F));
 157 //            dst += char(0x80 | ((wc >> 6) & 0x3F));
 158 //            dst += char(0x80 | ((wc) & 0x3F));
 159 //        }
 160 //        else if (!(wc & ~0x7FFFFFFF))
 161 //        {
 162 //            // 0x04000000 - 0x7FFFFFFF [min. 32bit storage, 6-byte encoding]
 163 //            // 1111110d 10dddddd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa
 164 //            dst += char(0xFC | ((wc >> 30) & 0x01));
 165 //            dst += char(0x80 | ((wc >> 24) & 0x3F));
 166 //            dst += char(0x80 | ((wc >> 18) & 0x3F));
 167 //            dst += char(0x80 | ((wc >> 12) & 0x3F));
 168 //            dst += char(0x80 | ((wc >> 6) & 0x3F));
 169 //            dst += char(0x80 | ((wc) & 0x3F));
 170 //        }
 171 //
 172         else
 173         {
 174             // report anything unknown/invalid as replacement character U+FFFD
 175             dst += char(0xEF);
 176             dst += char(0xBF);
 177             dst += char(0xBD);
 178         }
 179     }
 180
 181     return dst;
 182 }
 183
 184
 185 // * * * * * * * * * * * *  End of String Routines * * * * * * * * * * * * * //
 186
 187
 188 Token::Token(wchar_t* value)
 189 :
 190     kind(0),
 191     pos(0),
 192     col(0),
 193     line(0),
 194     val(value),
 195     next(NULL)
 196 {}
 197
 198
 199 Token::~Token()
 200 {}
 201
 202
 203 int Token::length() const
 204 {
 205     return val ? wcslen(val) : 0;
 206 }
 207
 208
 209 // ----------------------------------------------------------------------------
 210 // Buffer Implementation
 211 // ----------------------------------------------------------------------------
 212
 213 Buffer::Buffer(Buffer* b)
 214 :
 215         buf(b->buf),
 216         bufCapacity(b->bufCapacity),
 217         bufLen(b->bufLen),
 218         bufPos(b->bufPos),
 219         bufStart(b->bufStart),
 220         fileLen(b->fileLen),
 221         cStream(b->cStream),
 222         stdStream(b->stdStream),
 223         isUserStream_(b->isUserStream_)
 224 {
 225         // avoid accidental deletion on any of these members
 226         b->buf = NULL;
 227         b->cStream = NULL;
 228         b->stdStream = NULL;
 229 }
 230
 231
 232 Buffer::Buffer(const char* chars, int len)
 233 :
 234         buf(new unsigned char[len]),
 235         bufCapacity(len),
 236         bufLen(len),
 237         bufPos(0),
 238         bufStart(0),
 239         fileLen(len),
 240         cStream(NULL),
 241         stdStream(NULL),
 242         isUserStream_(false)
 243 {
 244         memcpy(this->buf, chars, len*sizeof(char));
 245 }
 246
 247
 248 Buffer::Buffer(const unsigned char* chars, int len)
 249 :
 250         buf(new unsigned char[len]),
 251         bufCapacity(len),
 252         bufLen(len),
 253         bufPos(0),
 254         bufStart(0),
 255         fileLen(len),
 256         cStream(NULL),
 257         stdStream(NULL),
 258         isUserStream_(false)
 259 {
 260         memcpy(this->buf, chars, len*sizeof(char));
 261 }
 262
 263
 264 Buffer::Buffer(FILE* ifh, bool isUserStream)
 265 :
 266         buf(NULL),
 267         bufCapacity(0),
 268         bufLen(0),
 269         bufPos(0),
 270         bufStart(0),
 271         fileLen(0),
 272         cStream(ifh),
 273         stdStream(NULL),
 274         isUserStream_(isUserStream)
 275 {
 276 // ensure binary read on windows
 277 #ifdef _WIN32
 278         _setmode(_fileno(cStream), _O_BINARY);
 279 #endif
 280
 281         if (CanSeek())
 282         {
 283                 fseek(cStream, 0, SEEK_END);
 284                 fileLen = ftell(cStream);
 285                 fseek(cStream, 0, SEEK_SET);
 286                 bufLen = (fileLen < MAX_BUFFER_LENGTH) ? fileLen : MAX_BUFFER_LENGTH;
 287                 bufStart = INT_MAX; // nothing in the buffer so far
 288         }
 289
 290         bufCapacity = (bufLen > 0) ? bufLen : MIN_BUFFER_LENGTH;
 291         buf = new unsigned char[bufCapacity];
 292         if (fileLen > 0) SetPos(0);          // setup buffer to position 0 (start)
 293         else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
 294         if (bufLen == fileLen && CanSeek()) Close();
 295 }
 296
 297
 298 Buffer::Buffer(std::istream* istr, bool isUserStream)
 299 :
 300         buf(NULL),
 301         bufCapacity(0),
 302         bufLen(0),
 303         bufPos(0),
 304         bufStart(0),
 305         fileLen(0),
 306         cStream(NULL),
 307         stdStream(istr),
 308         isUserStream_(isUserStream)
 309 {
 310 #if _WIN32
 311         // TODO: ensure binary read on windows?
 312 #endif
 313 }
 314
 315
 316 Buffer::~Buffer()
 317 {
 318         Close();
 319         if (buf)
 320         {
 321                 delete[] buf;
 322                 buf = NULL;
 323         }
 324 }
 325
 326
 327 void Buffer::Close()
 328 {
 329         if (!isUserStream_)
 330         {
 331                 if (cStream)
 332                 {
 333                         fclose(cStream);
 334                         cStream = NULL;
 335                 }
 336                 if (stdStream)
 337                 {
 338                         delete stdStream;
 339                         stdStream = 0;
 340                 }
 341         }
 342 }
 343
 344
 345 int Buffer::Read()
 346 {
 347         if (stdStream)
 348         {
 349                 int ch = stdStream->get();
 350                 if (stdStream->eof())
 351                 {
 352                         return EoF;
 353                 }
 354                 return ch;
 355         }
 356
 357         if (bufPos < bufLen) {
 358                 return buf[bufPos++];
 359         }
 360         else if (GetPos() < fileLen) {
 361                 SetPos(GetPos()); // shift buffer start to Pos
 362                 return buf[bufPos++];
 363         }
 364         else if (cStream && !CanSeek() && (ReadNextStreamChunk() > 0)) {
 365                 return buf[bufPos++];
 366         }
 367
 368         return EoF;
 369 }
 370
 371 bool Buffer::isUTF8() const
 372 {
 373         return false;
 374 }
 375
 376 int UTF8Buffer::Read()
 377 {
 378         int ch;
 379         do {
 380                 ch = Buffer::Read();
 381                 // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
 382         } while (ch != EoF && ch >= 128 && ((ch & 0xC0) != 0xC0));
 383         if (ch < 128 || ch == EoF) {
 384                 // nothing to do, first 127 chars are identical in ASCII and UTF8
 385                 // 0xxxxxxx or end of file character
 386         }
 387         else if ((ch & 0xF0) == 0xF0) {
 388                 // 0x00010000 - 0x001FFFFF [min. 24bit storage, 4-byte encoding]
 389                 // 11110ccc 10ccbbbb 10bbbbaa 10aaaaaa
 390                 // CAUTION: this should probably be disallowed since it overflows
 391                 // wchar_t on windows and overflows the max (0xFFFF) used here
 392                 int c1 = ch & 0x07; ch = Buffer::Read();
 393                 int c2 = ch & 0x3F; ch = Buffer::Read();
 394                 int c3 = ch & 0x3F; ch = Buffer::Read();
 395                 int c4 = ch & 0x3F;
 396                 ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
 397         }
 398         else if ((ch & 0xE0) == 0xE0) {
 399                 // 0x00000800 - 0x0000FFFF [min. 16bit storage, 3-byte encoding]
 400                 // 1110bbbb 10bbbbaa 10aaaaaa
 401                 int c1 = ch & 0x0F; ch = Buffer::Read();
 402                 int c2 = ch & 0x3F; ch = Buffer::Read();
 403                 int c3 = ch & 0x3F;
 404                 ch = (((c1 << 6) | c2) << 6) | c3;
 405         }
 406         else if ((ch & 0xC0) == 0xC0) {
 407                 // 0x00000080 - 0x000007FF [min. 16bit storage, 2-byte encoding]
 408                 // 110bbbaa 10aaaaaa
 409                 int c1 = ch & 0x1F; ch = Buffer::Read();
 410                 int c2 = ch & 0x3F;
 411                 ch = (c1 << 6) | c2;
 412         }
 413         return ch;
 414 }
 415
 416
 417 bool UTF8Buffer::isUTF8() const
 418 {
 419         return true;
 420 }
 421
 422
 423 int Buffer::Peek()
 424 {
 425         int curPos = GetPos();
 426         int ch = Read();
 427         SetPos(curPos);
 428         return ch;
 429 }
 430
 431
 432 int Buffer::GetPos() const
 433 {
 434         if (stdStream)
 435         {
 436                 return stdStream->tellg();
 437         }
 438
 439         return bufPos + bufStart;
 440 }
 441
 442
 443 void Buffer::SetPos(int value)
 444 {
 445         if (stdStream)
 446         {
 447                 stdStream->seekg(value, std::ios::beg);
 448                 return;
 449         }
 450
 451         if ((value >= fileLen) && cStream && !CanSeek())
 452         {
 453                 // Wanted position is after buffer and the stream
 454                 // is not seek-able e.g. network or console,
 455                 // thus we have to read the stream manually till
 456                 // the wanted position is in sight.
 457                 while ((value >= fileLen) && (ReadNextStreamChunk() > 0))
 458                 {}
 459         }
 460
 461         if ((value < 0) || (value > fileLen))
 462         {
 463                 fwprintf(stderr, L"--- buffer out of bounds access, position: %d\n", value);
 464                 ::exit(1);
 465         }
 466
 467         if ((value >= bufStart) && (value < (bufStart + bufLen))) // already in buffer
 468         {
 469                 bufPos = value - bufStart;
 470         }
 471         else if (cStream) // must be swapped in
 472         {
 473                 fseek(cStream, value, SEEK_SET);
 474                 bufLen = fread(buf, sizeof(char), bufCapacity, cStream);
 475                 bufStart = value; bufPos = 0;
 476         }
 477         else
 478         {
 479                 bufPos = fileLen - bufStart; // make Pos return fileLen
 480         }
 481 }
 482
 483
 484 //
 485 // Read the next chunk of bytes from the stream, increases the buffer
 486 // if needed and updates the fields fileLen and bufLen.
 487 // Returns the number of bytes read.
 488 //
 489 int Buffer::ReadNextStreamChunk()
 490 {
 491         int freeLen = bufCapacity - bufLen;
 492         if (freeLen == 0)
 493         {
 494                 // in the case of a growing input stream
 495                 // we can neither seek in the stream, nor can we
 496                 // foresee the maximum length, thus we must adapt
 497                 // the buffer size on demand.
 498                 bufCapacity = bufLen * 2;
 499                 unsigned char *newBuf = new unsigned char[bufCapacity];
 500                 memcpy(newBuf, buf, bufLen*sizeof(char));
 501                 delete[] buf;
 502                 buf = newBuf;
 503                 freeLen = bufLen;
 504         }
 505         int read = fread(buf + bufLen, sizeof(char), freeLen, cStream);
 506         if (read > 0)
 507         {
 508                 fileLen = bufLen = (bufLen + read);
 509                 return read;
 510         }
 511         // end of stream reached
 512         return 0;
 513 }
 514
 515
 516 bool Buffer::CanSeek() const
 517 {
 518         return cStream && (ftell(cStream) != -1);
 519 }
 520
 521 // ----------------------------------------------------------------------------
 522 // Scanner Implementation
 523 // ----------------------------------------------------------------------------
 524
 525 Scanner::Scanner(const char* buf, int len)
 526 :
 527         buffer(new Buffer(buf, len))
 528 {
 529         Init();
 530 }
 531
 532
 533 Scanner::Scanner(const unsigned char* buf, int len)
 534 :
 535         buffer(new Buffer(buf, len))
 536 {
 537         Init();
 538 }
 539
 540
 541 Scanner::Scanner(FILE* ifh)
 542 :
 543         buffer(new Buffer(ifh, true))
 544 {
 545         Init();
 546 }
 547
 548
 549 #ifdef _WIN32
 550 Scanner::Scanner(const std::wstring& fileName)
 551 {
 552         FILE* ifh;
 553
 554         if ((ifh = _wfopen(fileName.c_str(), L"rb")) == NULL)
 555         {
 556                 fwprintf(stderr, L"--- Cannot open file %ls\n", fileName.c_str());
 557                 ::exit(1);
 558         }
 559         buffer = new Buffer(ifh, false);
 560         Init();
 561 }
 562 #endif
 563
 564
 565 Scanner::Scanner(const std::string& fileName)
 566 {
 567         FILE* ifh;
 568         if ((ifh = fopen(fileName.c_str(), "rb")) == NULL)
 569         {
 570                 fwprintf(stderr, L"--- Cannot open file %s\n", fileName.c_str());
 571                 ::exit(1);
 572         }
 573         buffer = new Buffer(ifh, false);
 574         Init();
 575 }
 576
 577
 578 Scanner::Scanner(std::istream& istr)
 579 :
 580         buffer(new Buffer(&istr, true))
 581 {
 582         Init();
 583 }
 584
 585
 586 Scanner::~Scanner()
 587 {
 588         char* cur = reinterpret_cast<char*>(firstHeap);
 589
 590 #ifdef COCO_DEBUG_HEAP
 591         fwprintf(stderr, L"~Scanner:\n");
 592 #endif
 593
 594         while (cur)
 595         {
 596                 cur = *(reinterpret_cast<char**>(cur + HEAP_BLOCK_SIZE));
 597                 free(firstHeap);
 598 #ifdef COCO_DEBUG_HEAP
 599                 fwprintf
 600                 (
 601                         stderr, L"    free %p -> %p\n",
 602                         firstHeap,
 603                         reinterpret_cast<char*>(firstHeap) + HEAP_BLOCK_SIZE
 604                 );
 605 #endif
 606                 firstHeap = cur;
 607         }
 608         delete[] tval;
 609         delete buffer;
 610 }
 611
 612
 613 void Scanner::Init()
 614 {
 615         for (int i = 36; i <= 36; ++i) start.set(i, 7);
 616         for (int i = 65; i <= 90; ++i) start.set(i, 7);
 617         for (int i = 95; i <= 95; ++i) start.set(i, 7);
 618         for (int i = 97; i <= 122; ++i) start.set(i, 7);
 619         start.set(34, 1);
 620         start.set(39, 4);
 621         start.set(35, 11);
 622         start.set(10, 12);
 623         start.set(59, 13);
 624         start.set(Buffer::EoF, -1);
 625
 626         keywords.set(L"include", 6);
 627         keywords.set(L"import", 8);
 628
 629         tvalLength = 128;
 630         tval = new wchar_t[tvalLength]; // text of current token
 631         tlen = 0;
 632         tval[tlen] = 0;
 633
 634         // HEAP_BLOCK_SIZE byte heap + pointer to next heap block
 635         heap = malloc(HEAP_BLOCK_SIZE + sizeof(void*));
 636         firstHeap = heap;
 637         heapEnd =
 638                 reinterpret_cast<void**>
 639                 (reinterpret_cast<char*>(heap) + HEAP_BLOCK_SIZE);
 640         *heapEnd = 0;
 641         heapTop = heap;
 642         if (sizeof(Token) > HEAP_BLOCK_SIZE)
 643         {
 644                 fwprintf(stderr, L"--- Too small HEAP_BLOCK_SIZE\n");
 645                 ::exit(1);
 646         }
 647 #ifdef COCO_DEBUG_HEAP
 648         fwprintf
 649         (
 650                 stderr, L"Scanner::init: firstHeap %p -> %p\n",
 651                 firstHeap,
 652                 reinterpret_cast<char*>(firstHeap) + HEAP_BLOCK_SIZE
 653         );
 654 #endif
 655
 656         pos = -1; line = 1; col = 0;
 657         oldEols = 0;
 658         NextCh();
 659         if (ch == 0xEF)   // check optional byte order mark for UTF-8
 660         {                 // Windows-specific magic
 661                 NextCh(); int ch1 = ch;
 662                 NextCh(); int ch2 = ch;
 663                 if (ch1 != 0xBB || ch2 != 0xBF)
 664                 {
 665                         fwprintf(stderr, L"Illegal byte order mark at start of file");
 666                         ::exit(1);
 667                 }
 668                 Buffer *oldBuf = buffer;
 669                 buffer = new UTF8Buffer(oldBuf); col = 0;
 670                 delete oldBuf; oldBuf = NULL;
 671                 NextCh();
 672         }
 673         else
 674         {
 675                 // FORCE_UTF8 was defined
 676                 // use UTF8Buffer without relying on a byte order mark.
 677                 Buffer *oldBuf = buffer;
 678                 buffer = new UTF8Buffer(oldBuf); col = 0;
 679                 delete oldBuf; oldBuf = NULL;
 680         }
 681
 682         pt = tokens = CreateToken(); // first token is a dummy
 683 }
 684
 685
 686 void Scanner::NextCh()
 687 {
 688         if (oldEols > 0)
 689         {
 690                 ch = EOL;
 691                 oldEols--;
 692         }
 693         else
 694         {
 695                 pos = buffer->GetPos();
 696                 ch = buffer->Read(); col++;
 697                 // replace isolated '\r' by '\n' in order to make
 698                 // eol handling uniform across Windows, Unix and Mac
 699                 if (ch == '\r' && buffer->Peek() != '\n') ch = EOL;
 700                 if (ch == EOL) { line++; col = 0; }
 701         }
 702 }
 703
 704
 705 void Scanner::AddCh()
 706 {
 707         if (tlen >= tvalLength)
 708         {
 709                 tvalLength *= 2;
 710                 wchar_t *newBuf = new wchar_t[tvalLength];
 711                 memcpy(newBuf, tval, tlen*sizeof(wchar_t));
 712                 delete[] tval;
 713                 tval = newBuf;
 714         }
 715         if (ch != Buffer::EoF)
 716         {
 717                 tval[tlen++] = ch;
 718                 NextCh();
 719         }
 720 }
 721
 722
 723
 724 bool Scanner::Comment0() {
 725         int level = 1, pos0 = pos, line0 = line, col0 = col;
 726         NextCh();
 727         if (ch == '/') {
 728                 NextCh();
 729                 while (true) {
 730                         if (ch == 10) {
 731                                 level--;
 732                                 if (level == 0) { oldEols = line - line0; NextCh(); return true; }
 733                                 NextCh();
 734                         } else if (ch == buffer->EoF) return false;
 735                         else NextCh();
 736                 }
 737         } else {
 738                 buffer->SetPos(pos0); NextCh(); line = line0; col = col0;
 739         }
 740         return false;
 741 }
 742
 743 bool Scanner::Comment1() {
 744         int level = 1, pos0 = pos, line0 = line, col0 = col;
 745         NextCh();
 746         if (ch == '*') {
 747                 NextCh();
 748                 while (true) {
 749                         if (ch == '*') {
 750                                 NextCh();
 751                                 if (ch == '/') {
 752                                         level--;
 753                                         if (level == 0) { oldEols = line - line0; NextCh(); return true; }
 754                                         NextCh();
 755                                 }
 756                         } else if (ch == '/') {
 757                                 NextCh();
 758                                 if (ch == '*') {
 759                                         level++; NextCh();
 760                                 }
 761                         } else if (ch == buffer->EoF) return false;
 762                         else NextCh();
 763                 }
 764         } else {
 765                 buffer->SetPos(pos0); NextCh(); line = line0; col = col0;
 766         }
 767         return false;
 768 }
 769
 770 void Scanner::CreateHeapBlock()
 771 {
 772         char* cur = reinterpret_cast<char*>(firstHeap);
 773
 774 #ifdef COCO_DEBUG_HEAP
 775         fwprintf(stderr, L"CreateHeapBlock: tokens %p\n", tokens);
 776 #endif
 777
 778         // release unused blocks
 779         while
 780         (
 781             (reinterpret_cast<char*>(tokens) < cur)
 782          || (reinterpret_cast<char*>(tokens) > (cur + HEAP_BLOCK_SIZE))
 783         )
 784         {
 785                 cur = *(reinterpret_cast<char**>(cur + HEAP_BLOCK_SIZE));
 786 #ifdef COCO_DEBUG_HEAP
 787                 fwprintf
 788                 (
 789                         stderr, L"    free %p -> %p\n",
 790                         firstHeap,
 791                         reinterpret_cast<char*>(firstHeap) + HEAP_BLOCK_SIZE
 792                 );
 793 #endif
 794                 free(firstHeap);
 795                 firstHeap = cur;
 796         }
 797
 798         // HEAP_BLOCK_SIZE byte heap + pointer to next heap block
 799         void* newHeap = malloc(HEAP_BLOCK_SIZE + sizeof(void*));
 800         *heapEnd = newHeap;
 801         heapEnd =
 802                 reinterpret_cast<void**>
 803                 (reinterpret_cast<char*>(newHeap) + HEAP_BLOCK_SIZE);
 804         *heapEnd = 0;
 805         heap = newHeap;
 806         heapTop = heap;
 807 #ifdef COCO_DEBUG_HEAP
 808         fwprintf
 809         (
 810                 stderr, L"    malloc %p -> %p\n",
 811                 newHeap,
 812                 reinterpret_cast<char*>(newHeap) + HEAP_BLOCK_SIZE
 813         );
 814 #endif
 815 }
 816
 817
 818 Token* Scanner::CreateToken()
 819 {
 820         const int reqMem = sizeof(Token);
 821         if
 822         (
 823             (reinterpret_cast<char*>(heapTop) + reqMem)
 824          >= reinterpret_cast<char*>(heapEnd)
 825         )
 826         {
 827                 CreateHeapBlock();
 828         }
 829         // token 'occupies' heap starting at heapTop
 830         Token* tok = reinterpret_cast<Token*>(heapTop);
 831         // increment past this part of the heap, which is now used
 832         heapTop =
 833                 reinterpret_cast<void*>
 834                 (reinterpret_cast<char*>(heapTop) + reqMem);
 835         tok->val  = NULL;
 836         tok->next = NULL;
 837         return tok;
 838 }
 839
 840
 841 void Scanner::AppendVal(Token* tok)
 842 {
 843         const int reqMem = (tlen + 1) * sizeof(wchar_t);
 844         if
 845         (
 846             (reinterpret_cast<char*>(heapTop) + reqMem)
 847          >= reinterpret_cast<char*>(heapEnd)
 848         )
 849         {
 850                 if (reqMem > HEAP_BLOCK_SIZE)
 851                 {
 852                         fwprintf(stderr, L"--- Too long token value\n");
 853                         ::exit(1);
 854                 }
 855                 CreateHeapBlock();
 856         }
 857
 858         // add text value from heap
 859         tok->val = reinterpret_cast<wchar_t*>(heapTop);
 860
 861         // increment past this part of the heap, which is now used
 862         heapTop =
 863                 reinterpret_cast<void*>
 864                 (reinterpret_cast<char*>(heapTop) + reqMem);
 865
 866         // copy the currently parsed tval into the token
 867         wcsncpy(tok->val, tval, tlen);
 868         tok->val[tlen] = '\0';
 869 }
 870
 871
 872 Token* Scanner::NextToken()
 873 {
 874         while
 875         (
 876             ch == ' '
 877          || ch == 9
 878         ) NextCh();
 879         if ((ch == '/' && Comment0()) || (ch == '/' && Comment1())) return NextToken();
 880         int recKind = noSym;
 881         int recEnd = pos;
 882         t = CreateToken();
 883         t->pos = pos; t->col = col; t->line = line;
 884         int state = start.state(ch);
 885         tlen = 0; AddCh();
 886
 887         switch (state)
 888         {
 889                 case -1: { t->kind = eofSym; break; } // NextCh already done
 890                 case 0: {
 891                         case_0:
 892                         if (recKind != noSym) {
 893                                 tlen = recEnd - t->pos;
 894                                 SetScannerBehindT();
 895                         }
 896                         t->kind = recKind; break;
 897                 } // NextCh already done
 898                 case 1:
 899                         case_1:
 900                         if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= '!') || (ch >= '#' && ch <= '[') || (ch >= ']' && ch <= 65535)) {AddCh(); goto case_1;}
 901                         else if (ch == '"') {AddCh(); goto case_3;}
 902                         else if (ch == 92) {AddCh(); goto case_2;}
 903                         else {goto case_0;}
 904                 case 2:
 905                         case_2:
 906                         if ((ch >= ' ' && ch <= '~')) {AddCh(); goto case_1;}
 907                         else {goto case_0;}
 908                 case 3:
 909                         case_3:
 910                         {t->kind = 1; break;}
 911                 case 4:
 912                         case_4:
 913                         if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= '!') || (ch >= '#' && ch <= '&') || (ch >= '(' && ch <= '[') || (ch >= ']' && ch <= 65535)) {AddCh(); goto case_4;}
 914                         else if (ch == 39) {AddCh(); goto case_8;}
 915                         else if (ch == 92) {AddCh(); goto case_5;}
 916                         else {goto case_0;}
 917                 case 5:
 918                         case_5:
 919                         if ((ch >= ' ' && ch <= '~')) {AddCh(); goto case_4;}
 920                         else {goto case_0;}
 921                 case 6:
 922                         case_6:
 923                         {t->kind = 4; break;}
 924                 case 7:
 925                         case_7:
 926                         recEnd = pos; recKind = 3;
 927                         if (ch == '$' || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || ch == '_' || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_7;}
 928                         else if (ch == '.') {AddCh(); goto case_9;}
 929                         else {t->kind = 3; std::wstring literal(tval, tlen); t->kind = keywords.get(literal, t->kind); break;}
 930                 case 8:
 931                         case_8:
 932                         recEnd = pos; recKind = 2;
 933                         if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= '!') || (ch >= '#' && ch <= '&') || (ch >= '(' && ch <= '[') || (ch >= ']' && ch <= 65535)) {AddCh(); goto case_4;}
 934                         else if (ch == 39) {AddCh(); goto case_8;}
 935                         else if (ch == 92) {AddCh(); goto case_5;}
 936                         else {t->kind = 2; break;}
 937                 case 9:
 938                         case_9:
 939                         if (ch == '$' || (ch >= 'A' && ch <= 'Z') || ch == '_' || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_10;}
 940                         else if (ch == '*') {AddCh(); goto case_6;}
 941                         else {goto case_0;}
 942                 case 10:
 943                         case_10:
 944                         recEnd = pos; recKind = 3;
 945                         if (ch == '$' || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || ch == '_' || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_10;}
 946                         else if (ch == '.') {AddCh(); goto case_9;}
 947                         else {t->kind = 3; std::wstring literal(tval, tlen); t->kind = keywords.get(literal, t->kind); break;}
 948                 case 11:
 949                         {t->kind = 5; break;}
 950                 case 12:
 951                         {t->kind = 7; break;}
 952                 case 13:
 953                         {t->kind = 9; break;}
 954         }
 955         AppendVal(t);
 956         return t;
 957 }
 958
 959
 960 void Scanner::SetScannerBehindT()
 961 {
 962         buffer->SetPos(t->pos);
 963         NextCh();
 964         line = t->line; col = t->col;
 965         for (int i = 0; i < tlen; i++) NextCh();
 966 }
 967
 968
 969 // get the next token (possibly a token already seen during peeking)
 970 Token* Scanner::Scan()
 971 {
 972         if (tokens->next == NULL) {
 973                 pt = tokens = NextToken();
 974         }
 975         else {
 976                 pt = tokens = tokens->next;
 977         }
 978         return tokens;
 979 }
 980
 981
 982 // peek for the next token, ignore pragmas
 983 Token* Scanner::Peek()
 984 {
 985         do
 986         {
 987                 if (pt->next == NULL)
 988                 {
 989                         pt->next = NextToken();
 990                 }
 991                 pt = pt->next;
 992         } while (pt->kind > maxT);   // skip pragmas
 993
 994         return pt;
 995 }
 996
 997
 998 // make sure that peeking starts at the current scan position
 999 void Scanner::ResetPeek()
1000 {
1001         pt = tokens;
1002 }
1003
1004
1005 int Scanner::Line() const
1006 {
1007         return line;
1008 }
1009
1010
1011 void Scanner::Line(int lineNo)
1012 {
1013         line = lineNo;
1014 }
1015
1016
1017 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
1018
1019 } // End namespace
1020
1021 // ************************************************************************* //