use tokenlist for lexer parser.
[build-config.git] / src / config / lxrgmr-code / token / strtoken.h
blob018d20d7ed812c7ffdf41d3ecc0671e7cfd652bd
1 /************************************************************************
2 * $PROJECT Project
4 * (c) Copyright $YEAR, $COMPANY_EN. Inc., $CITY_EN, $COUNTRY_EN
5 * (c) Copyright $YEAR, $COMPANY_CN. Inc., $CITY_CN, $COUNTRY_CN
6 ************************************************************************
7 * filename: strtoken.h
8 * function: strtokenÄ£¿é¡£
9 * createdate: $DATE
10 * author: $AUTHOR
11 * note£º
14 ************************************************************************/
15 /* Modify record */
16 /************************************************************************
17 * date: $DATE
18 * author: $AUTHOR
19 * note:
21 ************************************************************************/
23 #ifndef __STRTOKEN_H__
24 #define __STRTOKEN_H__
26 #include "type.h"
27 #include "../tokenlist/token.h"
28 #include "../lib/StrBuff.h"
29 #include "../lib/AQStack.h"
31 /* Type Definations */
33 #define PTR_CHAR_CNT (sizeof(char *))
34 /* judge this char to get if it is a ptr value. */
35 #define CHAR_FLAG_BYTE (PTR_CHAR_CNT-1)
37 enum {
38 // STATE_INIT = 0,
39 STATE_CODE,
40 // STATE_CMD,
41 STATE_STRING,
42 STATE_TEXT_CMNT,
43 STATE_ENVAR,
44 STATE_EXPR,
45 STATE_CODEBLOCK,
48 enum {
49 EVT_ON_ENTER,
50 EVT_ON_EXIT,
53 typedef struct __tag_PARSER_DESC PARSER_DESC;
55 typedef int (*TOKEN_ID_PROC) (int evt, int token, register const char *str, register unsigned int len, PARSER_DESC *psrcfile);
57 struct token_id {
58 union {
59 /*
60 * normally, ptr is larger then 0xFFFFFF.
61 * but if it is opr-char-string, store string
62 * in ptr buffer, this will decrease memory cost.
64 char *name;
66 /* opr-char, it will decrease memory cost */
67 char chars[PTR_CHAR_CNT];
69 char *tokenstr;
70 int state;
71 int token;
72 union {
74 struct {
75 unsigned char flags;
76 unsigned char tokensetflags;
79 unsigned int flags;
80 struct {
81 unsigned short rsv;
82 /* TBD: put symbol_type to other */
83 unsigned short stype;
84 // enum symbol_type stype;
87 TOKEN_ID_PROC proc;
93 #define MAX_TOKEN_ID_TBL_SIZE 256
94 #define MAX_RSVWD_LEN 32
96 #define MAX_TOKEN_NUM 128
97 #define MAX_STATE_NUM 128
99 typedef enum {
100 TYPE_DUMMY = 0,
101 TYPE_MASK = 1,
102 TYPE_WORD = 2, // include multi-byte
103 TYPE_PUNCT = 3, // pfx/pair/opr char
104 TYPE_SPACE = 4,
105 TYPE_NEWLINE = 5,
106 TYPE_EOF = 6,
107 TYPE_PFX = 7,
108 TYPE_PAIR = 8,
109 TYPE_BUFF_FULL = 9, // buffer is full
110 TYPE_MAX,
111 } TOKEN_TYPE;
113 enum STATE_TOKEN_FLAGS {
114 TF_CODE_BEGIN,
115 TF_DOLLAR,
116 TF_FUNCTION,
117 TF_ASSIGN_EXPR,
120 #define SET_STATE_FLAG(state, flag) ((state)->u32Flag |= ~(flag))
121 #define CLR_STATE_FLAG(state, flag) ((state)->u32Flag &= (flag))
122 #define CHK_STATE_FLAG(state, flag) ((state)->u32Flag & (flag))
124 #define STATE_SWITCH(pfile, state) do { \
125 QStackIF_Push(&(pfile)->stStateStk, (uint32)(pfile)->pstCurrTokenState); \
126 (pfile)->pstCurrTokenState = state_alloc_init((pfile), (state)); \
127 (pfile)->pstCurrTokenState->paired_state = 0; \
128 } while (false)
130 // int paired_token_tmp = (pfile)->pstCurrTokenState->paired_token; \
131 // char *state_token_str_tmp = (pfile)->pstCurrTokenState->state_token_str; \
133 // (pfile)->pstCurrTokenState->paired_token = paired_token_tmp; \
134 // (pfile)->pstCurrTokenState->state_token_str = state_token_str_tmp; \
137 #define STATE_RECOVERY(pfile) do { \
138 state_free((void *)(pfile)->pstCurrTokenState); \
139 (pfile)->pstCurrTokenState = (TOKEN_PROC_STATE *)QStackIF_Pop(&(pfile)->stStateStk); \
140 if ((pfile)->pstCurrTokenState) \
141 (pfile)->pstStateInfo = (pfile)->pstCurrTokenState->pstStateInfo; \
142 } while (false)
144 #define STATE_CHK(pfile, state) ((pfile)->pstCurrTokenState->iStateId == (state))
146 #define STATE_VCHK(pvstate, state) ((pvstate)->iStateId == (state))
150 typedef struct __tag_TOKEN_PROC_STATE TOKEN_PROC_STATE;
151 typedef int (*TOKEN_PROC) (char *str, TOKEN_TYPE type, TOKEN_PROC_STATE *pstTokenState);
153 /* static state info struct */
154 typedef struct __tag_TOKEN_PROC_STATE_INFO {
155 char *strStateName;
156 // int iStateId;
158 struct {
159 TOKEN_PROC pfnTokenStrProc;
160 const struct token_id *pstTokenTbl;
161 int iTblSize;
162 } astTokenTypeTbl[TYPE_MAX];
163 short *api16TokenNameLen[TYPE_MAX];
164 } TOKEN_PROC_STATE_INFO, *PTOKEN_PROC_STATE_INFO;
166 /* run-time state struct */
167 typedef struct __tag_TOKEN_PROC_STATE {
168 int iStateId;
169 TOKEN_PROC_STATE_INFO *pstStateInfo;
171 /* resolv info */
172 char *token_str;
173 int token_len;
174 int nl_cnt;
175 int recent_token;
176 TOKEN_ID_PROC pfnTokenProc;
178 char *state_token_str;
179 TOKEN_ID_PROC pfnPairTokenProc;
180 int paired_token;
181 int paired_state;
184 * this buffer is used for state txt (cmnt/string/codeblock).
185 * it strores txt beyound token.
186 * eg: a string contains envar/subscript/arithexpr,
187 * stTxtBuff stores the format string for sprintf().
189 STR_BUFF stTxtBuff;
191 int tmp1;
192 int tmp2;
193 int tmp3;
195 /* statement token info */
196 int iTokenIdNum;
197 uint32 u32Flag;
198 uint32 u32CodeBlockLvl;
199 } TOKEN_PROC_STATE, *PTOKEN_PROC_STATE;
201 typedef struct __tag_PARSER_DESC PARSER_DESC;
202 typedef struct __tag_FILE_TYPE FILE_TYPE;
204 enum {
205 SRC_STDIN,
206 SRC_FILE,
207 SRC_BUFFER,
210 typedef struct __tag_SRC_FILE_DESC {
211 struct __tag_SRC_FILE_DESC *upper;
213 int iSrcType;
215 /* input buffer */
216 STR_BUFF stTxtBuff;
218 /* file */
219 char *filename;
220 FILE *file;
222 int lineno;
224 /* for getchar() buffering */
225 int iBlkSize;
226 int iCurrIdx;
227 char aBuff[2][512*8]; // TBD: use ptr. if src is a buffer, it can reduce mem cost.
229 struct __tag_PARSER_DESC *pstParser;
230 FILE_TYPE *pstFileType;
231 } SRC_FILE_DESC, *PSRC_FILE_DESC;
234 * one parser for one script-txt.
235 * the script-txt src canbe:
236 * @ src file
237 * @ buffer
238 * @ pipe(it's also a pipe)
240 typedef struct __tag_PARSER_DESC {
241 /* current info */
242 TOKEN_PROC_STATE *pstCurrTokenState;
243 TOKEN_PROC_STATE_INFO *pstStateInfo;
245 /* src file info */
246 SRC_FILE_DESC *pstCurrSrcFile;
248 /* corresponding token tbl for src file */
249 TOKEN_PROC_STATE_INFO *pstStateInfoTbl;
250 int iStateInfoNum;
251 const struct token_id *pstMTokenTbl;
252 int iMTokenNum;
253 unsigned short *pu16MTokenNameLen;
254 TOKEN_PROC pfnMTokenStrProc;
257 * @ stTxtBuff: store stTxtBuff in last sub state.
258 * when a token is return, this buffer is the coresponding data.
259 * it will be used in cmnt/codeblock/string state, when state recovery,
260 * @ strbuff: ti's a global buffer for parser, it will be free before every token parsing.
262 STR_BUFF stTxtBuff;
263 STR_BUFF stCmntTxtBuff;
265 /* one type of token string processing */
266 char aTokenStrBuff[512];
267 int iTokenStrLen;
268 int iPrevIdx;
270 /* token string processing info */
271 char bakchar;
272 int lasttype;
273 int type;
275 QSTACK stStateStk;
276 // char *pStkBuff;
277 QSTACK stTokenQ;
278 } PARSER_DESC, *PPARSER_DESC;
281 typedef struct __tag_FILE_TYPE {
282 struct __tag_FILE_TYPE *next;
284 char strFileTypeName[32];
285 int iRefCnt;
287 /* different file type use different token defination */
288 TOKEN_PROC_STATE_INFO *pstStateInfoTbl;
289 int iStateInfoNum;
290 const struct token_id *pstMTokenTbl;
291 unsigned short *pu16MTokenNameLen;
292 } FILE_TYPE, * PFILE_TYPE;
296 /* Public Variables and Functions */
297 extern TOKEN_PROC_STATE *state_alloc_init (PARSER_DESC *pfiledesc, int iState);
299 extern PARSER_DESC *gparser;
302 /* Source Format Type */
303 extern FILE_TYPE * SrcFmtType_Append (
304 char *strTypeName,
305 TOKEN_PROC_STATE_INFO astTokenStateTbl[],
306 int num,
307 const struct token_id *pstMTokenTbl,
308 unsigned short *pu16MTokenNameLen
310 extern bool SrcFmtType_Remove (char *strTypeName);
313 * Input Source
315 extern void IptSrc_Close (SRC_FILE_DESC *pstSrcFile);
316 extern SRC_FILE_DESC *IptSrc_Alloc (PARSER_DESC *pstParser);
317 extern SRC_FILE_DESC *IptSrc_InitInParser (PARSER_DESC *pstParser, char * pstrFileType);
318 extern SRC_FILE_DESC *IptSrc_FileOpen (
319 PARSER_DESC *pstParser,
320 char *filename,
321 char * pstrFileType);
322 extern SRC_FILE_DESC *IptSrc_StdinOpen (PARSER_DESC *pstParser, char * pstrFileType);
323 extern SRC_FILE_DESC *IptSrc_BuffOpen (
324 PARSER_DESC *pstParser,
325 STR_BUFF *buff,
326 char * pstrFileType);
328 /* Parser */
329 extern bool Parser_CtxFree (PARSER_DESC *pstParserDesc);
330 extern PARSER_DESC *Parser_CtxAlloc (void);
331 extern PARSER_DESC *Parser_AllocWithFile (char *filename, char *srctype);
332 extern bool Parser_CurrSrcClose (PARSER_DESC *pstParserDesc);
335 #endif /* __STRTOKEN_H__ */
374 #if 0
376 typedef struct __tag_SRC_FILE_DESC {
377 char *filename;
378 FILE *file;
379 int lineno;
381 /* for getchar() buffering */
382 int iBlkSize;
383 int iCurrIdx;
384 char aBuff[2][512*8];
386 TOKEN_PROC_STATE *pstCurrTokenState;
387 TOKEN_PROC_STATE_INFO *pstStateInfo;
388 TOKEN_PROC_STATE_INFO *pstStateInfoTbl;
389 int iStateInfoNum;
391 /* global token tbl for src file */
392 TOKEN_PROC pfnMTokenStrProc;
393 const struct token_id *pstMTokenTbl;
394 unsigned short *pu16MTokenNameLen;
396 /* token string processing info */
397 char bakchar;
398 int lasttype;
399 int type;
401 /* one type of token string processing */
402 char aTokenStrBuff[512];
403 int iTokenStrLen;
404 int iPrevIdx;
406 /* parsing state stack */
407 QSTACK stStateStk;
408 char *pStkBuff;
409 QSTACK stTokenQ;
410 } SRC_FILE_DESC, *PSRC_FILE_DESC;
412 #endif