1 /* $NetBSD: lexi.c,v 1.12 2003/08/07 11:14:09 agc Exp $ */
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
34 * Copyright (c) 1985 Sun Microsystems, Inc.
35 * All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 #include <sys/cdefs.h>
69 static char sccsid
[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
71 __RCSID("$NetBSD: lexi.c,v 1.12 2003/08/07 11:14:09 agc Exp $");
76 * Here we have the token scanner for indent. It scans off one token and puts
77 * it in the global variable "token". It returns a code, indicating the type
85 #include "indent_globs.h"
86 #include "indent_codes.h"
96 struct templ specials
[1000] =
130 { /* this is used to facilitate the decision of
131 * what type (alphanumeric, operator) each
133 0, 0, 0, 0, 0, 0, 0, 0,
134 0, 0, 0, 0, 0, 0, 0, 0,
135 0, 0, 0, 0, 0, 0, 0, 0,
136 0, 0, 0, 0, 0, 0, 0, 0,
137 0, 3, 0, 0, 1, 3, 3, 0,
138 0, 0, 3, 3, 0, 3, 0, 3,
139 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 0, 0, 3, 3, 3, 3,
141 0, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 0, 0, 0, 3, 1,
145 0, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 0, 3, 0, 3, 0
157 int unary_delim
; /* this is set to 1 if the current token
159 * forces a following operator to be unary */
160 static int last_code
; /* the last token type returned */
161 static int l_struct
; /* set to 1 if the last token was 'struct' */
162 int code
; /* internal code to be returned */
163 char qchar
; /* the delimiter character for a string */
165 e_token
= s_token
; /* point to start of place to save token */
167 ps
.col_1
= ps
.last_nl
; /* tell world that this token started in
168 * column 1 iff the last thing scanned was nl */
171 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
172 ps
.col_1
= false; /* leading blanks imply token is not
174 if (++buf_ptr
>= buf_end
)
178 /* Scan an alphanumeric token */
179 if (chartype
[(int) *buf_ptr
] == alphanum
||
180 (buf_ptr
[0] == '.' && isdigit((unsigned char)buf_ptr
[1]))) {
182 * we have a character or number
184 const char *j
; /* used for searching thru list of
188 if (isdigit((unsigned char)*buf_ptr
) ||
189 (buf_ptr
[0] == '.' && isdigit((unsigned char)buf_ptr
[1]))) {
190 int seendot
= 0, seenexp
= 0, seensfx
= 0;
191 if (*buf_ptr
== '0' &&
192 (buf_ptr
[1] == 'x' || buf_ptr
[1] == 'X')) {
193 *e_token
++ = *buf_ptr
++;
194 *e_token
++ = *buf_ptr
++;
195 while (isxdigit((unsigned char)*buf_ptr
)) {
197 *e_token
++ = *buf_ptr
++;
201 if (*buf_ptr
== '.') {
208 *e_token
++ = *buf_ptr
++;
209 if (!isdigit((unsigned char)*buf_ptr
)
210 && *buf_ptr
!= '.') {
212 && *buf_ptr
!= 'e') || seenexp
)
218 *e_token
++ = *buf_ptr
++;
219 if (*buf_ptr
== '+' || *buf_ptr
== '-')
220 *e_token
++ = *buf_ptr
++;
225 if (*buf_ptr
== 'F' || *buf_ptr
== 'f') {
227 *e_token
++ = *buf_ptr
++;
229 /* integer constant */
231 if (!(seensfx
& 1) &&
235 *e_token
++ = *buf_ptr
++;
239 if (!(seensfx
& 2) &&
243 if (buf_ptr
[1] == buf_ptr
[0])
244 *e_token
++ = *buf_ptr
++;
245 *e_token
++ = *buf_ptr
++;
253 while (chartype
[(int) *buf_ptr
] == alphanum
) { /* copy it over */
255 *e_token
++ = *buf_ptr
++;
256 if (buf_ptr
>= buf_end
)
260 while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
261 if (++buf_ptr
>= buf_end
)
264 ps
.its_a_keyword
= false;
265 ps
.sizeof_keyword
= false;
266 if (l_struct
) { /* if last token was 'struct', then this token
267 * should be treated as a declaration */
273 ps
.last_u_d
= false; /* Operator after indentifier is
275 last_code
= ident
; /* Remember that this is the code we
279 * This loop will check if the token is a keyword.
281 for (p
= specials
; (j
= p
->rwd
) != 0; p
++) {
282 char *pt
= s_token
; /* point at scanned token */
283 if (*j
++ != *pt
++ || *j
++ != *pt
++)
284 continue; /* This test depends on the
285 * fact that identifiers are
286 * always at least 1 character
287 * long (ie. the first two
288 * bytes of the identifier are
289 * always meaningful) */
291 break; /* If its a one-character identifier */
294 goto found_keyword
; /* I wish that C had a
295 * multi-level break... */
297 if (p
->rwd
) { /* we have a keyword */
299 ps
.its_a_keyword
= true;
302 case 1:/* it is a switch */
304 case 2:/* a case or default */
307 case 3:/* a "struct" */
309 break; /* inside parens: cast */
313 * Next time around, we will want to know that we have had a
316 case 4:/* one of the declaration keywords */
318 ps
.cast_mask
|= 1 << ps
.p_l_follow
;
319 break; /* inside parens: cast */
324 case 5:/* if, while, for */
327 case 6:/* do, else */
331 ps
.sizeof_keyword
= true;
332 default: /* all others are treated like any
333 * other identifier */
335 } /* end of switch */
336 } /* end of if (found_it) */
337 if (*buf_ptr
== '(' && ps
.tos
<= 1 && ps
.ind_level
== 0) {
340 if (*tp
++ == ')' && (*tp
== ';' || *tp
== ','))
342 strncpy(ps
.procname
, token
, sizeof ps
.procname
- 1);
343 ps
.in_parameter_declaration
= 1;
348 * The following hack attempts to guess whether or not the current
349 * token is in fact a declaration keyword -- one that has been
352 if (((*buf_ptr
== '*' && buf_ptr
[1] != '=') ||
353 isalpha((unsigned char)*buf_ptr
) || *buf_ptr
== '_')
356 && (ps
.last_token
== rparen
|| ps
.last_token
== semicolon
||
357 ps
.last_token
== decl
||
358 ps
.last_token
== lbrace
|| ps
.last_token
== rbrace
)) {
359 ps
.its_a_keyword
= true;
364 if (last_code
== decl
) /* if this is a declared variable,
365 * then following sign is unary */
366 ps
.last_u_d
= true; /* will make "int a -1" work */
368 return (ident
); /* the ident is not in the list */
369 } /* end of procesing for alpanum character */
370 /* Scan a non-alphanumeric token */
371 *e_token
++ = *buf_ptr
; /* if it is only a one-character token, it is
374 if (++buf_ptr
>= buf_end
)
379 unary_delim
= ps
.last_u_d
;
380 ps
.last_nl
= true; /* remember that we just had a newline */
381 code
= (had_eof
? 0 : newline
);
384 * if data has been exausted, the newline is a dummy, and we should
385 * return code to stop
389 case '\'': /* start of quoted character */
390 case '"': /* start of string */
396 e_token
= chfont(&bodyf
, &stringf
, e_token
);
398 do { /* copy the string */
399 while (1) { /* move one character or
401 if (*buf_ptr
== '\n') {
402 printf("%d: Unterminated literal\n", line_no
);
405 CHECK_SIZE_TOKEN
; /* Only have to do this
408 * guarantees that there
411 *e_token
= *buf_ptr
++;
412 if (buf_ptr
>= buf_end
)
414 if (*e_token
== BACKSLASH
) { /* if escape, copy extra
416 if (*buf_ptr
== '\n') /* check for escaped
420 *++e_token
= BACKSLASH
;
421 if (*buf_ptr
== BACKSLASH
)
422 *++e_token
= BACKSLASH
;
424 *++e_token
= *buf_ptr
++;
425 ++e_token
; /* we must increment
426 * this again because we
427 * copied two chars */
428 if (buf_ptr
>= buf_end
)
431 break; /* we copied one character */
432 } /* end of while (1) */
433 } while (*e_token
++ != qchar
);
435 e_token
= chfont(&stringf
, &bodyf
, e_token
- 1);
455 unary_delim
= ps
.last_u_d
;
478 * if (ps.in_or_st) ps.block_init = 1;
480 /* ? code = ps.block_init ? lparen : lbrace; */
486 /* ? code = ps.block_init ? rparen : rbrace; */
490 case 014: /* a form feed */
491 unary_delim
= ps
.last_u_d
;
492 ps
.last_nl
= true; /* remember this so we can set
493 * 'ps.col_1' right */
508 case '+': /* check for -, +, --, ++ */
509 code
= (ps
.last_u_d
? unary_op
: binary_op
);
512 if (*buf_ptr
== token
[0]) {
513 /* check for doubled character */
514 *e_token
++ = *buf_ptr
++;
515 /* buffer overflow will be checked at end of loop */
516 if (last_code
== ident
|| last_code
== rparen
) {
517 code
= (ps
.last_u_d
? unary_op
: postop
);
518 /* check for following ++ or -- */
523 /* check for operator += */
524 *e_token
++ = *buf_ptr
++;
526 if (*buf_ptr
== '>') {
527 /* check for operator -> */
528 *e_token
++ = *buf_ptr
++;
529 if (!pointer_as_binop
) {
532 ps
.want_blank
= false;
535 break; /* buffer overflow will be checked at end of
542 if (chartype
[*buf_ptr
] == opchar
) { /* we have two char
544 e_token
[-1] = *buf_ptr
++;
545 if ((e_token
[-1] == '<' || e_token
[-1] == '>') && e_token
[-1] == *buf_ptr
)
546 *e_token
++ = *buf_ptr
++;
547 *e_token
++ = '='; /* Flip =+ to += */
551 if (*buf_ptr
== '=') { /* == */
552 *e_token
++ = '='; /* Flip =+ to += */
560 /* can drop thru!!! */
564 case '!': /* ops like <, <<, <=, !=, etc */
565 if (*buf_ptr
== '>' || *buf_ptr
== '<' || *buf_ptr
== '=') {
566 *e_token
++ = *buf_ptr
;
567 if (++buf_ptr
>= buf_end
)
571 *e_token
++ = *buf_ptr
++;
572 code
= (ps
.last_u_d
? unary_op
: binary_op
);
577 if (token
[0] == '/' && *buf_ptr
== '*') {
578 /* it is start of comment */
581 if (++buf_ptr
>= buf_end
)
585 unary_delim
= ps
.last_u_d
;
588 while (*(e_token
- 1) == *buf_ptr
|| *buf_ptr
== '=') {
590 * handle ||, &&, etc, and also things as in int *****i
592 *e_token
++ = *buf_ptr
;
593 if (++buf_ptr
>= buf_end
)
596 code
= (ps
.last_u_d
? unary_op
: binary_op
);
600 } /* end of switch */
601 if (code
!= newline
) {
605 if (buf_ptr
>= buf_end
) /* check for input buffer empty */
607 ps
.last_u_d
= unary_delim
;
608 *e_token
= '\0'; /* null terminate the token */
612 * Add the given keyword to the keyword table, using val as the keyword type
615 addkey(char *key
, int val
)
617 struct templ
*p
= specials
;
619 if (p
->rwd
[0] == key
[0] && strcmp(p
->rwd
, key
) == 0)
623 if (p
>= specials
+ sizeof specials
/ sizeof specials
[0])
624 return; /* For now, table overflows are silently