Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / usr.bin / indent / lexi.c
blobf038448f62fceb9a93336361cef5f327bf966457
1 /* $NetBSD: lexi.c,v 1.12 2003/08/07 11:14:09 agc Exp $ */
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
33 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
34 * Copyright (c) 1985 Sun Microsystems, Inc.
35 * All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
66 #include <sys/cdefs.h>
67 #ifndef lint
68 #if 0
69 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
70 #else
71 __RCSID("$NetBSD: lexi.c,v 1.12 2003/08/07 11:14:09 agc Exp $");
72 #endif
73 #endif /* not lint */
76 * Here we have the token scanner for indent. It scans off one token and puts
77 * it in the global variable "token". It returns a code, indicating the type
78 * of token scanned.
81 #include <stdio.h>
82 #include <ctype.h>
83 #include <stdlib.h>
84 #include <string.h>
85 #include "indent_globs.h"
86 #include "indent_codes.h"
88 #define alphanum 1
89 #define opchar 3
91 struct templ {
92 const char *rwd;
93 int rwcode;
96 struct templ specials[1000] =
98 {"switch", 1},
99 {"case", 2},
100 {"break", 0},
101 {"struct", 3},
102 {"union", 3},
103 {"enum", 3},
104 {"default", 2},
105 {"int", 4},
106 {"char", 4},
107 {"float", 4},
108 {"double", 4},
109 {"long", 4},
110 {"short", 4},
111 {"typdef", 4},
112 {"unsigned", 4},
113 {"register", 4},
114 {"static", 4},
115 {"global", 4},
116 {"extern", 4},
117 {"void", 4},
118 {"goto", 0},
119 {"return", 0},
120 {"if", 5},
121 {"while", 5},
122 {"for", 5},
123 {"else", 6},
124 {"do", 6},
125 {"sizeof", 7},
126 {0, 0}
129 char chartype[128] =
130 { /* this is used to facilitate the decision of
131 * what type (alphanumeric, operator) each
132 * character is */
133 0, 0, 0, 0, 0, 0, 0, 0,
134 0, 0, 0, 0, 0, 0, 0, 0,
135 0, 0, 0, 0, 0, 0, 0, 0,
136 0, 0, 0, 0, 0, 0, 0, 0,
137 0, 3, 0, 0, 1, 3, 3, 0,
138 0, 0, 3, 3, 0, 3, 0, 3,
139 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 0, 0, 3, 3, 3, 3,
141 0, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 0, 0, 0, 3, 1,
145 0, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 0, 3, 0, 3, 0
155 lexi(void)
157 int unary_delim; /* this is set to 1 if the current token
159 * forces a following operator to be unary */
160 static int last_code; /* the last token type returned */
161 static int l_struct; /* set to 1 if the last token was 'struct' */
162 int code; /* internal code to be returned */
163 char qchar; /* the delimiter character for a string */
165 e_token = s_token; /* point to start of place to save token */
166 unary_delim = false;
167 ps.col_1 = ps.last_nl; /* tell world that this token started in
168 * column 1 iff the last thing scanned was nl */
169 ps.last_nl = false;
171 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
172 ps.col_1 = false; /* leading blanks imply token is not
173 * in column 1 */
174 if (++buf_ptr >= buf_end)
175 fill_buffer();
178 /* Scan an alphanumeric token */
179 if (chartype[(int) *buf_ptr] == alphanum ||
180 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
182 * we have a character or number
184 const char *j; /* used for searching thru list of
185 * reserved words */
186 struct templ *p;
188 if (isdigit((unsigned char)*buf_ptr) ||
189 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
190 int seendot = 0, seenexp = 0, seensfx = 0;
191 if (*buf_ptr == '0' &&
192 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
193 *e_token++ = *buf_ptr++;
194 *e_token++ = *buf_ptr++;
195 while (isxdigit((unsigned char)*buf_ptr)) {
196 CHECK_SIZE_TOKEN;
197 *e_token++ = *buf_ptr++;
199 } else {
200 while (1) {
201 if (*buf_ptr == '.') {
202 if (seendot)
203 break;
204 else
205 seendot++;
207 CHECK_SIZE_TOKEN;
208 *e_token++ = *buf_ptr++;
209 if (!isdigit((unsigned char)*buf_ptr)
210 && *buf_ptr != '.') {
211 if ((*buf_ptr != 'E'
212 && *buf_ptr != 'e') || seenexp)
213 break;
214 else {
215 seenexp++;
216 seendot++;
217 CHECK_SIZE_TOKEN;
218 *e_token++ = *buf_ptr++;
219 if (*buf_ptr == '+' || *buf_ptr == '-')
220 *e_token++ = *buf_ptr++;
225 if (*buf_ptr == 'F' || *buf_ptr == 'f') {
226 /* float constant */
227 *e_token++ = *buf_ptr++;
228 } else {
229 /* integer constant */
230 while (1) {
231 if (!(seensfx & 1) &&
232 (*buf_ptr == 'U' ||
233 *buf_ptr == 'u')) {
234 CHECK_SIZE_TOKEN;
235 *e_token++ = *buf_ptr++;
236 seensfx |= 1;
237 continue;
239 if (!(seensfx & 2) &&
240 (*buf_ptr == 'L' ||
241 *buf_ptr == 'l')) {
242 CHECK_SIZE_TOKEN;
243 if (buf_ptr[1] == buf_ptr[0])
244 *e_token++ = *buf_ptr++;
245 *e_token++ = *buf_ptr++;
246 seensfx |= 2;
247 continue;
249 break;
252 } else
253 while (chartype[(int) *buf_ptr] == alphanum) { /* copy it over */
254 CHECK_SIZE_TOKEN;
255 *e_token++ = *buf_ptr++;
256 if (buf_ptr >= buf_end)
257 fill_buffer();
259 *e_token++ = '\0';
260 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
261 if (++buf_ptr >= buf_end)
262 fill_buffer();
264 ps.its_a_keyword = false;
265 ps.sizeof_keyword = false;
266 if (l_struct) { /* if last token was 'struct', then this token
267 * should be treated as a declaration */
268 l_struct = false;
269 last_code = ident;
270 ps.last_u_d = true;
271 return (decl);
273 ps.last_u_d = false; /* Operator after indentifier is
274 * binary */
275 last_code = ident; /* Remember that this is the code we
276 * will return */
279 * This loop will check if the token is a keyword.
281 for (p = specials; (j = p->rwd) != 0; p++) {
282 char *pt = s_token; /* point at scanned token */
283 if (*j++ != *pt++ || *j++ != *pt++)
284 continue; /* This test depends on the
285 * fact that identifiers are
286 * always at least 1 character
287 * long (ie. the first two
288 * bytes of the identifier are
289 * always meaningful) */
290 if (pt[-1] == 0)
291 break; /* If its a one-character identifier */
292 while (*pt++ == *j)
293 if (*j++ == 0)
294 goto found_keyword; /* I wish that C had a
295 * multi-level break... */
297 if (p->rwd) { /* we have a keyword */
298 found_keyword:
299 ps.its_a_keyword = true;
300 ps.last_u_d = true;
301 switch (p->rwcode) {
302 case 1:/* it is a switch */
303 return (swstmt);
304 case 2:/* a case or default */
305 return (casestmt);
307 case 3:/* a "struct" */
308 if (ps.p_l_follow)
309 break; /* inside parens: cast */
310 l_struct = true;
313 * Next time around, we will want to know that we have had a
314 * 'struct'
316 case 4:/* one of the declaration keywords */
317 if (ps.p_l_follow) {
318 ps.cast_mask |= 1 << ps.p_l_follow;
319 break; /* inside parens: cast */
321 last_code = decl;
322 return (decl);
324 case 5:/* if, while, for */
325 return (sp_paren);
327 case 6:/* do, else */
328 return (sp_nparen);
330 case 7:
331 ps.sizeof_keyword = true;
332 default: /* all others are treated like any
333 * other identifier */
334 return (ident);
335 } /* end of switch */
336 } /* end of if (found_it) */
337 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
338 char *tp = buf_ptr;
339 while (tp < buf_end)
340 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
341 goto not_proc;
342 strncpy(ps.procname, token, sizeof ps.procname - 1);
343 ps.in_parameter_declaration = 1;
344 rparen_count = 1;
345 not_proc: ;
348 * The following hack attempts to guess whether or not the current
349 * token is in fact a declaration keyword -- one that has been
350 * typedefd
352 if (((*buf_ptr == '*' && buf_ptr[1] != '=') ||
353 isalpha((unsigned char)*buf_ptr) || *buf_ptr == '_')
354 && !ps.p_l_follow
355 && !ps.block_init
356 && (ps.last_token == rparen || ps.last_token == semicolon ||
357 ps.last_token == decl ||
358 ps.last_token == lbrace || ps.last_token == rbrace)) {
359 ps.its_a_keyword = true;
360 ps.last_u_d = true;
361 last_code = decl;
362 return decl;
364 if (last_code == decl) /* if this is a declared variable,
365 * then following sign is unary */
366 ps.last_u_d = true; /* will make "int a -1" work */
367 last_code = ident;
368 return (ident); /* the ident is not in the list */
369 } /* end of procesing for alpanum character */
370 /* Scan a non-alphanumeric token */
371 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
372 * moved here */
373 *e_token = '\0';
374 if (++buf_ptr >= buf_end)
375 fill_buffer();
377 switch (*token) {
378 case '\n':
379 unary_delim = ps.last_u_d;
380 ps.last_nl = true; /* remember that we just had a newline */
381 code = (had_eof ? 0 : newline);
384 * if data has been exausted, the newline is a dummy, and we should
385 * return code to stop
387 break;
389 case '\'': /* start of quoted character */
390 case '"': /* start of string */
391 qchar = *token;
392 if (troff) {
393 e_token[-1] = '`';
394 if (qchar == '"')
395 *e_token++ = '`';
396 e_token = chfont(&bodyf, &stringf, e_token);
398 do { /* copy the string */
399 while (1) { /* move one character or
400 * [/<char>]<char> */
401 if (*buf_ptr == '\n') {
402 printf("%d: Unterminated literal\n", line_no);
403 goto stop_lit;
405 CHECK_SIZE_TOKEN; /* Only have to do this
406 * once in this loop,
407 * since CHECK_SIZE
408 * guarantees that there
409 * are at least 5
410 * entries left */
411 *e_token = *buf_ptr++;
412 if (buf_ptr >= buf_end)
413 fill_buffer();
414 if (*e_token == BACKSLASH) { /* if escape, copy extra
415 * char */
416 if (*buf_ptr == '\n') /* check for escaped
417 * newline */
418 ++line_no;
419 if (troff) {
420 *++e_token = BACKSLASH;
421 if (*buf_ptr == BACKSLASH)
422 *++e_token = BACKSLASH;
424 *++e_token = *buf_ptr++;
425 ++e_token; /* we must increment
426 * this again because we
427 * copied two chars */
428 if (buf_ptr >= buf_end)
429 fill_buffer();
430 } else
431 break; /* we copied one character */
432 } /* end of while (1) */
433 } while (*e_token++ != qchar);
434 if (troff) {
435 e_token = chfont(&stringf, &bodyf, e_token - 1);
436 if (qchar == '"')
437 *e_token++ = '\'';
439 stop_lit:
440 code = ident;
441 break;
443 case ('('):
444 case ('['):
445 unary_delim = true;
446 code = lparen;
447 break;
449 case (')'):
450 case (']'):
451 code = rparen;
452 break;
454 case '#':
455 unary_delim = ps.last_u_d;
456 code = preesc;
457 break;
459 case '?':
460 unary_delim = true;
461 code = question;
462 break;
464 case (':'):
465 code = colon;
466 unary_delim = true;
467 break;
469 case (';'):
470 unary_delim = true;
471 code = semicolon;
472 break;
474 case ('{'):
475 unary_delim = true;
478 * if (ps.in_or_st) ps.block_init = 1;
480 /* ? code = ps.block_init ? lparen : lbrace; */
481 code = lbrace;
482 break;
484 case ('}'):
485 unary_delim = true;
486 /* ? code = ps.block_init ? rparen : rbrace; */
487 code = rbrace;
488 break;
490 case 014: /* a form feed */
491 unary_delim = ps.last_u_d;
492 ps.last_nl = true; /* remember this so we can set
493 * 'ps.col_1' right */
494 code = form_feed;
495 break;
497 case (','):
498 unary_delim = true;
499 code = comma;
500 break;
502 case '.':
503 unary_delim = false;
504 code = period;
505 break;
507 case '-':
508 case '+': /* check for -, +, --, ++ */
509 code = (ps.last_u_d ? unary_op : binary_op);
510 unary_delim = true;
512 if (*buf_ptr == token[0]) {
513 /* check for doubled character */
514 *e_token++ = *buf_ptr++;
515 /* buffer overflow will be checked at end of loop */
516 if (last_code == ident || last_code == rparen) {
517 code = (ps.last_u_d ? unary_op : postop);
518 /* check for following ++ or -- */
519 unary_delim = false;
521 } else
522 if (*buf_ptr == '=')
523 /* check for operator += */
524 *e_token++ = *buf_ptr++;
525 else
526 if (*buf_ptr == '>') {
527 /* check for operator -> */
528 *e_token++ = *buf_ptr++;
529 if (!pointer_as_binop) {
530 unary_delim = false;
531 code = unary_op;
532 ps.want_blank = false;
535 break; /* buffer overflow will be checked at end of
536 * switch */
538 case '=':
539 if (ps.in_or_st)
540 ps.block_init = 1;
541 #ifdef undef
542 if (chartype[*buf_ptr] == opchar) { /* we have two char
543 * assignment */
544 e_token[-1] = *buf_ptr++;
545 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
546 *e_token++ = *buf_ptr++;
547 *e_token++ = '='; /* Flip =+ to += */
548 *e_token = 0;
550 #else
551 if (*buf_ptr == '=') { /* == */
552 *e_token++ = '='; /* Flip =+ to += */
553 buf_ptr++;
554 *e_token = 0;
556 #endif
557 code = binary_op;
558 unary_delim = true;
559 break;
560 /* can drop thru!!! */
562 case '>':
563 case '<':
564 case '!': /* ops like <, <<, <=, !=, etc */
565 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
566 *e_token++ = *buf_ptr;
567 if (++buf_ptr >= buf_end)
568 fill_buffer();
570 if (*buf_ptr == '=')
571 *e_token++ = *buf_ptr++;
572 code = (ps.last_u_d ? unary_op : binary_op);
573 unary_delim = true;
574 break;
576 default:
577 if (token[0] == '/' && *buf_ptr == '*') {
578 /* it is start of comment */
579 *e_token++ = '*';
581 if (++buf_ptr >= buf_end)
582 fill_buffer();
584 code = comment;
585 unary_delim = ps.last_u_d;
586 break;
588 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
590 * handle ||, &&, etc, and also things as in int *****i
592 *e_token++ = *buf_ptr;
593 if (++buf_ptr >= buf_end)
594 fill_buffer();
596 code = (ps.last_u_d ? unary_op : binary_op);
597 unary_delim = true;
600 } /* end of switch */
601 if (code != newline) {
602 l_struct = false;
603 last_code = code;
605 if (buf_ptr >= buf_end) /* check for input buffer empty */
606 fill_buffer();
607 ps.last_u_d = unary_delim;
608 *e_token = '\0'; /* null terminate the token */
609 return (code);
612 * Add the given keyword to the keyword table, using val as the keyword type
614 void
615 addkey(char *key, int val)
617 struct templ *p = specials;
618 while (p->rwd)
619 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
620 return;
621 else
622 p++;
623 if (p >= specials + sizeof specials / sizeof specials[0])
624 return; /* For now, table overflows are silently
625 * ignored */
626 p->rwd = key;
627 p->rwcode = val;
628 p[1].rwd = 0;
629 p[1].rwcode = 0;