1 /*---------------------------------*- C++ -*---------------------------------*\
3 \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
5 \\ / A nd | Copyright (C) 2011 OpenFOAM Foundation
7 -------------------------------------------------------------------------------
9 This file is part of OpenFOAM.
11 OpenFOAM is free software: you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 You should have received a copy of the GNU General Public License
22 along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
24 @file wmkdependParser.atg
27 An attributed Coco/R grammar to parse C/C++, Fortran and Java files
28 for include and import statements.
33 \*---------------------------------------------------------------------------*/
34 // This file was generated with Coco/R C++ (10 Mar 2010)
35 // http://www.ssw.uni-linz.ac.at/coco/
36 // with these defines:
42 // io.h and fcntl are used to ensure binary read from streams on windows
50 #include "wmkdependScanner.h"
52 // values for the file stream buffering
53 #define MIN_BUFFER_LENGTH 1024 // 1KB
54 #define MAX_BUFFER_LENGTH (64*MIN_BUFFER_LENGTH) // 64KB
55 // value for the heap management
56 #define HEAP_BLOCK_SIZE (64*1024) // 64KB
61 // * * * * * * * * * * * Miscellaneous String Routines * * * * * * * * * * * //
64 // string handling, byte character
67 std::string
coco_stdString(const wchar_t* str
)
69 return str
? coco_stdString(str
, 0, wcslen(str
)) : std::string();
73 std::string
coco_stdString(const wchar_t* str
, unsigned length
)
75 return coco_stdString(str
, 0, length
);
79 std::string
coco_stdString(const wchar_t* str
, unsigned index
, unsigned length
)
81 const unsigned len
= (str
&& *str
) ? length
: 0;
85 for (unsigned i
= 0; i
< len
; ++i
)
87 dst
+= char(str
[index
+i
] & 0xFF);
94 std::string
coco_stdStringUTF8(const wchar_t* str
)
96 return str
? coco_stdStringUTF8(str
, 0, wcslen(str
)) : std::string();
100 std::string
coco_stdStringUTF8(const wchar_t* str
, unsigned length
)
102 return coco_stdStringUTF8(str
, 0, length
);
106 std::string
coco_stdStringUTF8(const wchar_t* str
, unsigned index
, unsigned length
)
108 const unsigned len
= (str
&& *str
) ? length
: 0;
112 for (unsigned i
= 0; i
< len
; ++i
)
114 wchar_t wc
= str
[index
+i
];
116 if (!(wc
& ~0x0000007F))
118 // 0x00000000 - 0x0000007F [min. 8bit storage, 1-byte encoding)
122 else if (!(wc
& ~0x000007FF))
124 // 0x00000080 - 0x000007FF [min. 16bit storage, 2-byte encoding]
126 dst
+= char(0xC0 | ((wc
>> 6) & 0x1F));
127 dst
+= char(0x80 | ((wc
) & 0x3F));
129 else if (!(wc
& ~0x0000FFFF))
131 // 0x00000800 - 0x0000FFFF [min. 16bit storage, 3-byte encoding]
132 // 1110bbbb 10bbbbaa 10aaaaaa
133 dst
+= char(0xE0 | ((wc
>> 12) & 0x0F));
134 dst
+= char(0x80 | ((wc
>> 6) & 0x3F));
135 dst
+= char(0x80 | ((wc
) & 0x3F));
137 else if (!(wc
& ~0x001FFFFF))
139 // 0x00010000 - 0x001FFFFF [min. 24bit storage, 4-byte encoding]
140 // 11110ccc 10ccbbbb 10bbbbaa 10aaaaaa
141 dst
+= char(0xF0 | ((wc
>> 18) & 0x07));
142 dst
+= char(0x80 | ((wc
>> 12) & 0x3F));
143 dst
+= char(0x80 | ((wc
>> 6) & 0x3F));
144 dst
+= char(0x80 | ((wc
) & 0x3F));
147 // Not (yet) used - wchar_t storage is limited to 16bit on windows
148 // This also corresponds to the unicode BMP (Basic Multilingual Plane)
150 // else if (!(wc & ~0x03FFFFFF))
152 // // 0x00200000 - 0x03FFFFFF [min. 32bit storage, 5-byte encoding]
153 // // 111110dd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa
154 // dst += char(0xF8 | ((wc >> 24) & 0x03));
155 // dst += char(0x80 | ((wc >> 18) & 0x3F));
156 // dst += char(0x80 | ((wc >> 12) & 0x3F));
157 // dst += char(0x80 | ((wc >> 6) & 0x3F));
158 // dst += char(0x80 | ((wc) & 0x3F));
160 // else if (!(wc & ~0x7FFFFFFF))
162 // // 0x04000000 - 0x7FFFFFFF [min. 32bit storage, 6-byte encoding]
163 // // 1111110d 10dddddd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa
164 // dst += char(0xFC | ((wc >> 30) & 0x01));
165 // dst += char(0x80 | ((wc >> 24) & 0x3F));
166 // dst += char(0x80 | ((wc >> 18) & 0x3F));
167 // dst += char(0x80 | ((wc >> 12) & 0x3F));
168 // dst += char(0x80 | ((wc >> 6) & 0x3F));
169 // dst += char(0x80 | ((wc) & 0x3F));
174 // report anything unknown/invalid as replacement character U+FFFD
185 // * * * * * * * * * * * * End of String Routines * * * * * * * * * * * * * //
188 Token::Token(wchar_t* value
)
203 int Token::length() const
205 return val
? wcslen(val
) : 0;
209 // ----------------------------------------------------------------------------
210 // Buffer Implementation
211 // ----------------------------------------------------------------------------
213 Buffer::Buffer(Buffer
* b
)
216 bufCapacity(b
->bufCapacity
),
219 bufStart(b
->bufStart
),
222 stdStream(b
->stdStream
),
223 isUserStream_(b
->isUserStream_
)
225 // avoid accidental deletion on any of these members
232 Buffer::Buffer(const char* chars
, int len
)
234 buf(new unsigned char[len
]),
244 memcpy(this->buf
, chars
, len
*sizeof(char));
248 Buffer::Buffer(const unsigned char* chars
, int len
)
250 buf(new unsigned char[len
]),
260 memcpy(this->buf
, chars
, len
*sizeof(char));
264 Buffer::Buffer(FILE* ifh
, bool isUserStream
)
274 isUserStream_(isUserStream
)
276 // ensure binary read on windows
278 _setmode(_fileno(cStream
), _O_BINARY
);
283 fseek(cStream
, 0, SEEK_END
);
284 fileLen
= ftell(cStream
);
285 fseek(cStream
, 0, SEEK_SET
);
286 bufLen
= (fileLen
< MAX_BUFFER_LENGTH
) ? fileLen
: MAX_BUFFER_LENGTH
;
287 bufStart
= INT_MAX
; // nothing in the buffer so far
290 bufCapacity
= (bufLen
> 0) ? bufLen
: MIN_BUFFER_LENGTH
;
291 buf
= new unsigned char[bufCapacity
];
292 if (fileLen
> 0) SetPos(0); // setup buffer to position 0 (start)
293 else bufPos
= 0; // index 0 is already after the file, thus Pos = 0 is invalid
294 if (bufLen
== fileLen
&& CanSeek()) Close();
298 Buffer::Buffer(std::istream
* istr
, bool isUserStream
)
308 isUserStream_(isUserStream
)
311 // TODO: ensure binary read on windows?
349 int ch
= stdStream
->get();
350 if (stdStream
->eof())
357 if (bufPos
< bufLen
) {
358 return buf
[bufPos
++];
360 else if (GetPos() < fileLen
) {
361 SetPos(GetPos()); // shift buffer start to Pos
362 return buf
[bufPos
++];
364 else if (cStream
&& !CanSeek() && (ReadNextStreamChunk() > 0)) {
365 return buf
[bufPos
++];
371 bool Buffer::isUTF8() const
376 int UTF8Buffer::Read()
381 // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
382 } while (ch
!= EoF
&& ch
>= 128 && ((ch
& 0xC0) != 0xC0));
383 if (ch
< 128 || ch
== EoF
) {
384 // nothing to do, first 127 chars are identical in ASCII and UTF8
385 // 0xxxxxxx or end of file character
387 else if ((ch
& 0xF0) == 0xF0) {
388 // 0x00010000 - 0x001FFFFF [min. 24bit storage, 4-byte encoding]
389 // 11110ccc 10ccbbbb 10bbbbaa 10aaaaaa
390 // CAUTION: this should probably be disallowed since it overflows
391 // wchar_t on windows and overflows the max (0xFFFF) used here
392 int c1
= ch
& 0x07; ch
= Buffer::Read();
393 int c2
= ch
& 0x3F; ch
= Buffer::Read();
394 int c3
= ch
& 0x3F; ch
= Buffer::Read();
396 ch
= (((((c1
<< 6) | c2
) << 6) | c3
) << 6) | c4
;
398 else if ((ch
& 0xE0) == 0xE0) {
399 // 0x00000800 - 0x0000FFFF [min. 16bit storage, 3-byte encoding]
400 // 1110bbbb 10bbbbaa 10aaaaaa
401 int c1
= ch
& 0x0F; ch
= Buffer::Read();
402 int c2
= ch
& 0x3F; ch
= Buffer::Read();
404 ch
= (((c1
<< 6) | c2
) << 6) | c3
;
406 else if ((ch
& 0xC0) == 0xC0) {
407 // 0x00000080 - 0x000007FF [min. 16bit storage, 2-byte encoding]
409 int c1
= ch
& 0x1F; ch
= Buffer::Read();
417 bool UTF8Buffer::isUTF8() const
425 int curPos
= GetPos();
432 int Buffer::GetPos() const
436 return stdStream
->tellg();
439 return bufPos
+ bufStart
;
443 void Buffer::SetPos(int value
)
447 stdStream
->seekg(value
, std::ios::beg
);
451 if ((value
>= fileLen
) && cStream
&& !CanSeek())
453 // Wanted position is after buffer and the stream
454 // is not seek-able e.g. network or console,
455 // thus we have to read the stream manually till
456 // the wanted position is in sight.
457 while ((value
>= fileLen
) && (ReadNextStreamChunk() > 0))
461 if ((value
< 0) || (value
> fileLen
))
463 fwprintf(stderr
, L
"--- buffer out of bounds access, position: %d\n", value
);
467 if ((value
>= bufStart
) && (value
< (bufStart
+ bufLen
))) // already in buffer
469 bufPos
= value
- bufStart
;
471 else if (cStream
) // must be swapped in
473 fseek(cStream
, value
, SEEK_SET
);
474 bufLen
= fread(buf
, sizeof(char), bufCapacity
, cStream
);
475 bufStart
= value
; bufPos
= 0;
479 bufPos
= fileLen
- bufStart
; // make Pos return fileLen
485 // Read the next chunk of bytes from the stream, increases the buffer
486 // if needed and updates the fields fileLen and bufLen.
487 // Returns the number of bytes read.
489 int Buffer::ReadNextStreamChunk()
491 int freeLen
= bufCapacity
- bufLen
;
494 // in the case of a growing input stream
495 // we can neither seek in the stream, nor can we
496 // foresee the maximum length, thus we must adapt
497 // the buffer size on demand.
498 bufCapacity
= bufLen
* 2;
499 unsigned char *newBuf
= new unsigned char[bufCapacity
];
500 memcpy(newBuf
, buf
, bufLen
*sizeof(char));
505 int read
= fread(buf
+ bufLen
, sizeof(char), freeLen
, cStream
);
508 fileLen
= bufLen
= (bufLen
+ read
);
511 // end of stream reached
516 bool Buffer::CanSeek() const
518 return cStream
&& (ftell(cStream
) != -1);
521 // ----------------------------------------------------------------------------
522 // Scanner Implementation
523 // ----------------------------------------------------------------------------
525 Scanner::Scanner(const char* buf
, int len
)
527 buffer(new Buffer(buf
, len
))
533 Scanner::Scanner(const unsigned char* buf
, int len
)
535 buffer(new Buffer(buf
, len
))
541 Scanner::Scanner(FILE* ifh
)
543 buffer(new Buffer(ifh
, true))
550 Scanner::Scanner(const std::wstring
& fileName
)
554 if ((ifh
= _wfopen(fileName
.c_str(), L
"rb")) == NULL
)
556 fwprintf(stderr
, L
"--- Cannot open file %ls\n", fileName
.c_str());
559 buffer
= new Buffer(ifh
, false);
565 Scanner::Scanner(const std::string
& fileName
)
568 if ((ifh
= fopen(fileName
.c_str(), "rb")) == NULL
)
570 fwprintf(stderr
, L
"--- Cannot open file %s\n", fileName
.c_str());
573 buffer
= new Buffer(ifh
, false);
578 Scanner::Scanner(std::istream
& istr
)
580 buffer(new Buffer(&istr
, true))
588 char* cur
= reinterpret_cast<char*>(firstHeap
);
590 #ifdef COCO_DEBUG_HEAP
591 fwprintf(stderr
, L
"~Scanner:\n");
596 cur
= *(reinterpret_cast<char**>(cur
+ HEAP_BLOCK_SIZE
));
598 #ifdef COCO_DEBUG_HEAP
601 stderr
, L
" free %p -> %p\n",
603 reinterpret_cast<char*>(firstHeap
) + HEAP_BLOCK_SIZE
615 for (int i
= 36; i
<= 36; ++i
) start
.set(i
, 7);
616 for (int i
= 65; i
<= 90; ++i
) start
.set(i
, 7);
617 for (int i
= 95; i
<= 95; ++i
) start
.set(i
, 7);
618 for (int i
= 97; i
<= 122; ++i
) start
.set(i
, 7);
624 start
.set(Buffer::EoF
, -1);
626 keywords
.set(L
"include", 6);
627 keywords
.set(L
"import", 8);
630 tval
= new wchar_t[tvalLength
]; // text of current token
634 // HEAP_BLOCK_SIZE byte heap + pointer to next heap block
635 heap
= malloc(HEAP_BLOCK_SIZE
+ sizeof(void*));
638 reinterpret_cast<void**>
639 (reinterpret_cast<char*>(heap
) + HEAP_BLOCK_SIZE
);
642 if (sizeof(Token
) > HEAP_BLOCK_SIZE
)
644 fwprintf(stderr
, L
"--- Too small HEAP_BLOCK_SIZE\n");
647 #ifdef COCO_DEBUG_HEAP
650 stderr
, L
"Scanner::init: firstHeap %p -> %p\n",
652 reinterpret_cast<char*>(firstHeap
) + HEAP_BLOCK_SIZE
656 pos
= -1; line
= 1; col
= 0;
659 if (ch
== 0xEF) // check optional byte order mark for UTF-8
660 { // Windows-specific magic
661 NextCh(); int ch1
= ch
;
662 NextCh(); int ch2
= ch
;
663 if (ch1
!= 0xBB || ch2
!= 0xBF)
665 fwprintf(stderr
, L
"Illegal byte order mark at start of file");
668 Buffer
*oldBuf
= buffer
;
669 buffer
= new UTF8Buffer(oldBuf
); col
= 0;
670 delete oldBuf
; oldBuf
= NULL
;
675 // FORCE_UTF8 was defined
676 // use UTF8Buffer without relying on a byte order mark.
677 Buffer
*oldBuf
= buffer
;
678 buffer
= new UTF8Buffer(oldBuf
); col
= 0;
679 delete oldBuf
; oldBuf
= NULL
;
682 pt
= tokens
= CreateToken(); // first token is a dummy
686 void Scanner::NextCh()
695 pos
= buffer
->GetPos();
696 ch
= buffer
->Read(); col
++;
697 // replace isolated '\r' by '\n' in order to make
698 // eol handling uniform across Windows, Unix and Mac
699 if (ch
== '\r' && buffer
->Peek() != '\n') ch
= EOL
;
700 if (ch
== EOL
) { line
++; col
= 0; }
705 void Scanner::AddCh()
707 if (tlen
>= tvalLength
)
710 wchar_t *newBuf
= new wchar_t[tvalLength
];
711 memcpy(newBuf
, tval
, tlen
*sizeof(wchar_t));
715 if (ch
!= Buffer::EoF
)
724 bool Scanner::Comment0() {
725 int level
= 1, pos0
= pos
, line0
= line
, col0
= col
;
732 if (level
== 0) { oldEols
= line
- line0
; NextCh(); return true; }
734 } else if (ch
== buffer
->EoF
) return false;
738 buffer
->SetPos(pos0
); NextCh(); line
= line0
; col
= col0
;
743 bool Scanner::Comment1() {
744 int level
= 1, pos0
= pos
, line0
= line
, col0
= col
;
753 if (level
== 0) { oldEols
= line
- line0
; NextCh(); return true; }
756 } else if (ch
== '/') {
761 } else if (ch
== buffer
->EoF
) return false;
765 buffer
->SetPos(pos0
); NextCh(); line
= line0
; col
= col0
;
770 void Scanner::CreateHeapBlock()
772 char* cur
= reinterpret_cast<char*>(firstHeap
);
774 #ifdef COCO_DEBUG_HEAP
775 fwprintf(stderr
, L
"CreateHeapBlock: tokens %p\n", tokens
);
778 // release unused blocks
781 (reinterpret_cast<char*>(tokens
) < cur
)
782 || (reinterpret_cast<char*>(tokens
) > (cur
+ HEAP_BLOCK_SIZE
))
785 cur
= *(reinterpret_cast<char**>(cur
+ HEAP_BLOCK_SIZE
));
786 #ifdef COCO_DEBUG_HEAP
789 stderr
, L
" free %p -> %p\n",
791 reinterpret_cast<char*>(firstHeap
) + HEAP_BLOCK_SIZE
798 // HEAP_BLOCK_SIZE byte heap + pointer to next heap block
799 void* newHeap
= malloc(HEAP_BLOCK_SIZE
+ sizeof(void*));
802 reinterpret_cast<void**>
803 (reinterpret_cast<char*>(newHeap
) + HEAP_BLOCK_SIZE
);
807 #ifdef COCO_DEBUG_HEAP
810 stderr
, L
" malloc %p -> %p\n",
812 reinterpret_cast<char*>(newHeap
) + HEAP_BLOCK_SIZE
818 Token
* Scanner::CreateToken()
820 const int reqMem
= sizeof(Token
);
823 (reinterpret_cast<char*>(heapTop
) + reqMem
)
824 >= reinterpret_cast<char*>(heapEnd
)
829 // token 'occupies' heap starting at heapTop
830 Token
* tok
= reinterpret_cast<Token
*>(heapTop
);
831 // increment past this part of the heap, which is now used
833 reinterpret_cast<void*>
834 (reinterpret_cast<char*>(heapTop
) + reqMem
);
841 void Scanner::AppendVal(Token
* tok
)
843 const int reqMem
= (tlen
+ 1) * sizeof(wchar_t);
846 (reinterpret_cast<char*>(heapTop
) + reqMem
)
847 >= reinterpret_cast<char*>(heapEnd
)
850 if (reqMem
> HEAP_BLOCK_SIZE
)
852 fwprintf(stderr
, L
"--- Too long token value\n");
858 // add text value from heap
859 tok
->val
= reinterpret_cast<wchar_t*>(heapTop
);
861 // increment past this part of the heap, which is now used
863 reinterpret_cast<void*>
864 (reinterpret_cast<char*>(heapTop
) + reqMem
);
866 // copy the currently parsed tval into the token
867 wcsncpy(tok
->val
, tval
, tlen
);
868 tok
->val
[tlen
] = '\0';
872 Token
* Scanner::NextToken()
879 if ((ch
== '/' && Comment0()) || (ch
== '/' && Comment1())) return NextToken();
883 t
->pos
= pos
; t
->col
= col
; t
->line
= line
;
884 int state
= start
.state(ch
);
889 case -1: { t
->kind
= eofSym
; break; } // NextCh already done
892 if (recKind
!= noSym
) {
893 tlen
= recEnd
- t
->pos
;
896 t
->kind
= recKind
; break;
897 } // NextCh already done
900 if (ch
<= 9 || (ch
>= 11 && ch
<= 12) || (ch
>= 14 && ch
<= '!') || (ch
>= '#' && ch
<= '[') || (ch
>= ']' && ch
<= 65535)) {AddCh(); goto case_1
;}
901 else if (ch
== '"') {AddCh(); goto case_3
;}
902 else if (ch
== 92) {AddCh(); goto case_2
;}
906 if ((ch
>= ' ' && ch
<= '~')) {AddCh(); goto case_1
;}
910 {t
->kind
= 1; break;}
913 if (ch
<= 9 || (ch
>= 11 && ch
<= 12) || (ch
>= 14 && ch
<= '!') || (ch
>= '#' && ch
<= '&') || (ch
>= '(' && ch
<= '[') || (ch
>= ']' && ch
<= 65535)) {AddCh(); goto case_4
;}
914 else if (ch
== 39) {AddCh(); goto case_8
;}
915 else if (ch
== 92) {AddCh(); goto case_5
;}
919 if ((ch
>= ' ' && ch
<= '~')) {AddCh(); goto case_4
;}
923 {t
->kind
= 4; break;}
926 recEnd
= pos
; recKind
= 3;
927 if (ch
== '$' || (ch
>= '0' && ch
<= '9') || (ch
>= 'A' && ch
<= 'Z') || ch
== '_' || (ch
>= 'a' && ch
<= 'z')) {AddCh(); goto case_7
;}
928 else if (ch
== '.') {AddCh(); goto case_9
;}
929 else {t
->kind
= 3; std::wstring
literal(tval
, tlen
); t
->kind
= keywords
.get(literal
, t
->kind
); break;}
932 recEnd
= pos
; recKind
= 2;
933 if (ch
<= 9 || (ch
>= 11 && ch
<= 12) || (ch
>= 14 && ch
<= '!') || (ch
>= '#' && ch
<= '&') || (ch
>= '(' && ch
<= '[') || (ch
>= ']' && ch
<= 65535)) {AddCh(); goto case_4
;}
934 else if (ch
== 39) {AddCh(); goto case_8
;}
935 else if (ch
== 92) {AddCh(); goto case_5
;}
936 else {t
->kind
= 2; break;}
939 if (ch
== '$' || (ch
>= 'A' && ch
<= 'Z') || ch
== '_' || (ch
>= 'a' && ch
<= 'z')) {AddCh(); goto case_10
;}
940 else if (ch
== '*') {AddCh(); goto case_6
;}
944 recEnd
= pos
; recKind
= 3;
945 if (ch
== '$' || (ch
>= '0' && ch
<= '9') || (ch
>= 'A' && ch
<= 'Z') || ch
== '_' || (ch
>= 'a' && ch
<= 'z')) {AddCh(); goto case_10
;}
946 else if (ch
== '.') {AddCh(); goto case_9
;}
947 else {t
->kind
= 3; std::wstring
literal(tval
, tlen
); t
->kind
= keywords
.get(literal
, t
->kind
); break;}
949 {t
->kind
= 5; break;}
951 {t
->kind
= 7; break;}
953 {t
->kind
= 9; break;}
960 void Scanner::SetScannerBehindT()
962 buffer
->SetPos(t
->pos
);
964 line
= t
->line
; col
= t
->col
;
965 for (int i
= 0; i
< tlen
; i
++) NextCh();
969 // get the next token (possibly a token already seen during peeking)
970 Token
* Scanner::Scan()
972 if (tokens
->next
== NULL
) {
973 pt
= tokens
= NextToken();
976 pt
= tokens
= tokens
->next
;
982 // peek for the next token, ignore pragmas
983 Token
* Scanner::Peek()
987 if (pt
->next
== NULL
)
989 pt
->next
= NextToken();
992 } while (pt
->kind
> maxT
); // skip pragmas
998 // make sure that peeking starts at the current scan position
999 void Scanner::ResetPeek()
1005 int Scanner::Line() const
1011 void Scanner::Line(int lineNo
)
1017 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
1021 // ************************************************************************* //