4 * Copyright (C) 1989-2021 Alan R. Baldwin
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 * 28-Oct-97 JLH bug in getst(): sign extend on ~(SPACE|ILL)
28 * causes infinite loop
32 * Extensions: P. Felber, M. Hope
35 #include "dbuf_string.h"
40 * The module aslex.c includes the general lexical
41 * analysis routines for the assembler.
43 * aslex.c contains the following functions:
61 * aslex.c contains no local/static variables
64 /*)Function VOID getid(id,c)
66 * char * id a pointer to a string of
67 * maximum length NCPS-1
69 * >=0 this is first character to
70 * copy to the string buffer
71 * <0 skip white space, first
72 * character must be a LETTER
74 * The function getid() scans the current assembler-source text line
75 * from the current position copying the next LETTER | DIGIT string
76 * into the external string buffer id[]. The string ends when a non
77 * LETTER or DIGIT character is found. The maximum number of characters
78 * copied is NCPS-1. If the input string is larger than NCPS-1
79 * characters then the string is truncated. The string is always
80 * NULL terminated. If the mode argument (c) is >=0 then (c) is
81 * the first character copied to the string buffer, if (c) is <0
82 * then intervening white space (SPACES and TABS) are skipped and
83 * the first character found must be a LETTER else a 'q' error
84 * terminates the parse of this assembler-source text line.
87 * char * p pointer to external string buffer
88 * int c current character value
91 * char ctype[] a character array which defines the
92 * type of character being processed.
93 * This index is the character
99 * VOID unget() aslex.c
100 * VOID qerr() assubr.c
103 * Use of getnb(), get(), and unget() updates the
104 * global pointer ip, the position in the current
105 * assembler-source text line.
109 getid(char *id
, int c
)
115 if ((ctype
[c
] & LETTER
) == 0)
122 } while (ctype
[c
=get()] & (LETTER
|DIGIT
));
127 /*)Function VOID getst(id,c)
129 * char * id a pointer to a string of
130 * maximum length NCPS-1
132 * >=0 this is first character to
133 * copy to the string buffer
134 * <0 skip white space, first
135 * character must be a LETTER
137 * The function getst() scans the current assembler-source text line
138 * from the current position copying the next character string into
139 * the external string buffer (id). The string ends when a SPACE or
140 * ILL character is found. The maximum number of characters copied is
141 * NCPS-1. If the input string is larger than NCPS-1 characters then
142 * the string is truncated. The string is always NULL terminated.
143 * If the mode argument (c) is >=0 then (c) is the first character
144 * copied to the string buffer, if (c) is <0 then intervening white
145 * space (SPACES and TABS) are skipped and the first character found
146 * must be a LETTER else a 'q' error terminates the parse of this
147 * assembler-source text line.
150 * char * p pointer to external string buffer
151 * int c current character value
154 * char ctype[] a character array which defines the
155 * type of character being processed.
156 * This index is the character
161 * int getnb() aslex.c
162 * VOID unget() aslex.c
163 * VOID qerr() assubr.c
166 * use of getnb(), get(), and unget() updates the
167 * global pointer ip, the position in the current
168 * assembler-source text line.
172 getst(char *id
, int c
)
178 if ((ctype
[c
] & LETTER
) == 0)
185 } while (ctype
[c
=get()] & ~(SPACE
|ILL
) & 0xFF);
190 /*)Function int getdstr(str, slen)
192 * char * str character array to return string in
193 * int slen charater array length
195 * The function getdstr() returns the character string
196 * within delimiters. If no delimiting character
197 * is found a 'q' error is generated.
200 * int c current character from
201 * assembler-source text line
202 * int d the delimiting character
209 * int getdlm() aslex.c
210 * VOID qerr() assubr.c
213 * Returns the character string delimited by the
214 * character returned from getdlm(). SPACEs and
215 * TABs before the delimited string are skipped.
216 * A 'q' error is generated if no delimited string
217 * is found or the input line terminates unexpectedly.
221 getdstr(char *str
, int slen
)
229 while ((c
= get()) != d
) {
233 if (p
< &str
[slen
-1]) {
242 /*)Function int getdlm()
244 * The function getdlm() returns the delimiter character
245 * or if the end of the line is encountered a 'q' error
249 * int c current character from
250 * assembler-source text line
257 * int getnb() aslex.c
259 * VOID qerr() assubr.c
262 * scans ip to the first non 'SPACE' or 'TAB' character
263 * and returns that character or the first character
264 * following a ^ character as the delimiting character.
265 * The end of the text line or the begining of a
266 * comment returns causes a 'q' error.
275 if ((c
= getnb()) == '^') {
287 /*)Function int getnb()
289 * The function getnb() scans the current assembler-source
290 * text line returning the first character not a SPACE or TAB.
293 * int c current character from
294 * assembler-source text line
303 * use of get() updates the global pointer ip, the position
304 * in the current assembler-source text line
312 while ((c
=get()) == ' ' || c
== '\t')
317 /*)Function int get()
319 * The function get() returns the next character in the
320 * assembler-source text line, at the end of the line a
321 * NULL character is returned.
324 * int c current character from
325 * assembler-source text line
328 * char * ip pointer into the current
329 * assembler-source text line
335 * updates ip to the next character position in the
336 * assembler-source text line. If ip is at the end of the
337 * line, ip is not updated.
350 /*)Function VOID unget(c)
352 * int c value of last character read from
353 * assembler-source text line
355 * If (c) is not a NULL character then the global pointer ip
356 * is updated to point to the preceeding character in the
357 * assembler-source text line.
359 * NOTE: This function does not push the character (c)
360 * back into the assembler-source text line, only
361 * the pointer ip is changed.
364 * int c last character read from
365 * assembler-source text line
368 * char * ip position into the current
369 * assembler-source text line
375 * ip decremented by 1 character position
386 /*)Function int getmap(d)
388 * int d value to compare with the
389 * assembler-source text line character
391 * The function getmap() converts the 'C' style characters \b, \f,
392 * \n, \r, and \t to their equivalent ascii values and also
393 * converts 'C' style octal constants '\123' to their equivalent
394 * numeric values. If the first character is equivalent to (d) then
395 * a (-1) is returned, if the end of the line is detected then
396 * a 'q' error terminates the parse for this line, or if the first
397 * character is not a \ then the character value is returned.
400 * int c value of character from the
401 * assembler-source text line
402 * int n looping counter
403 * int v current value of numeric conversion
410 * VOID qerr() assubr.c
413 * use of get() updates the global pointer ip the position
414 * in the current assembler-source text line
422 if ((c
=get()) == '\0')
460 while (++n
<=3 && c
>='0' && c
<='7') {
461 v
= (v
<<3) + c
- '0';
477 /*)Function int comma(flag)
479 * int flag when flag is non zero a 'q' error is
480 * generated if a COMMA is not found.
482 * The function comma() skips SPACEs and TABs and returns
483 * a '1' if the next character is a COMMA else a '0' is
484 * returned. If a COMMA is not found and flag is non zero
485 * then a 'q' error is reported.
488 * int c last character read from
489 * assembler-source text line
495 * int getnb() aslex.c
496 * VOID unget() aslex.c
497 * VOID xerr() assubr.c
500 * assembler-source text line pointer updated
508 if ((c
= getnb()) != ',') {
510 xerr('q', "Expected a ','.");
519 /*)Function int nxtline()
521 * The function nxtline() reads a line of assembler-source text
522 * from an assembly source text file, include file, or macro.
523 * Lines of text are processed from assembler-source files until
524 * all files have been read. If an include file is opened then
525 * lines of text are read from the include file (or nested
526 * include file) until the end of the include file is found.
527 * The input text line is transferred into the global string
528 * ib[] and converted to a NULL terminated string. The string
529 * is then copied into the global string ic[] which is used
530 * for internal processing by the assembler. The function
531 * nxtline() returns a (1) after succesfully reading
532 * a line, or a (0) if all files have been read.
535 * int len string length
536 * struct asmf *asmt temporary pointer to the processing structure
539 * char afn[] afile() constructed filespec
540 * int afp afile constructed path length
541 * asmf * asmc pointer to current assembler file structure
542 * asmf * asmi pointer to a queued include file structure
543 * asmf * asmq pointer to a queued macro structure
544 * char * ib string buffer containing
545 * assembler-source text line for processing
546 * char * ic string buffer containing
547 * assembler-source text line for listing
548 * int asmline source file line number
549 * int incfil current include file count
550 * int incline include file line number
551 * int lnlist LIST-NLIST state
552 * int mcrline macro line number
553 * int srcline current source line number
554 * int uflag -u, disable .list/.nlist processing
558 * int dbuf_set_length()
560 * const char * dbuf_c_str()
561 * int dbuf_append_str()
562 * VOID chopcrlf() aslex.c
563 * int fclose() c_library
564 * char * fgetm() asmcro.c
565 * VOID scanline() aslex.c
566 * char * strcpy() c_library
569 * include file will be closed at detection of end of file.
570 * the next sequential source file may be selected.
571 * The current file specification afn[] and the path
572 * length afp may be changed.
573 * The respective line counter will be updated.
575 * --------------------------------------------------------------
577 * How the assembler sequences the command line assembler
578 * source files, include files, and macros is shown in a
579 * simplified manner in the following.
581 * main[asmain] sequences the command line files by creating
582 * a linked list of asmf structures, one for each file.
585 * ------------- ------------- -------------
586 * | File 1 | | File 2 | | File N |
587 * ------ | ------| | ------| | ------|
588 * | asmp | -->| | next | --> | | next | --> ... --> | | NULL |
589 * ------ ------------- ------------- -------------
591 * At the beginning of each assembler pass set asmc = asmp
592 * and process the files in sequence.
594 * If the source file invokes the .include directive to process a
595 * file then a new asmf structure is prepended to the asmc structure
596 * currently being processed. At the end of the include file the
597 * processing resumes at the point the asmc structure was interrupted.
598 * This is shown in the following:
608 * ------------- ------------- -------------
609 * | File 1 | | File 2 | | File N |
610 * ------ | ------| | ------| | ------|
611 * | asmp | -->| | next | --> | | next | --> ... --> | | NULL |
612 * ------ ------------- ------------- -------------
614 * At the .include point link the asmi structure to asmc
615 * and then set asmc = asmi (the include file asmf structure).
617 * If a source file invokes a macro then a new asmf structure is
618 * prepended to the asmc structure currently being processed. At the
619 * end of the macro the processing resumes at the point the asmc
620 * structure was interrupted.
621 * This is shown in the following:
623 * ------------- -------------
624 * | Incl File 1 | | Macro |
625 * | ------| | ------|
626 * | | next | | | next |
627 * ------------- -------------
629 * asmf structures: | |
631 * ------------- ------------- -------------
632 * | File 1 | | File 2 | | File N |
633 * ------ | ------| | ------| | ------|
634 * | asmp | -->| | next | --> | | next | --> ... --> | | NULL |
635 * ------ ------------- ------------- -------------
637 * At the macro point link the asmq structure to asmc
638 * and then set asmc = asmq (the macro asmf structure).
640 * Note that both include files and macros can be nested.
641 * Macros may be invoked within include files and include
642 * files can be invoked within macros.
644 * Include files are opened, read, and closed on each pass
647 * Macros are recreated during each pass of the assembler.
654 static struct dbuf_s dbuf_ib
;
655 static struct dbuf_s dbuf_ic
;
659 if (!dbuf_is_initialized (&dbuf_ib
))
660 dbuf_init (&dbuf_ib
, 1024);
661 if (!dbuf_is_initialized (&dbuf_ic
))
662 dbuf_init (&dbuf_ic
, 1024);
663 dbuf_set_length (&dbuf_ib
, 0);
664 dbuf_set_length (&dbuf_ic
, 0);
666 loop
: if (asmc
== NULL
) return(0);
669 * Insert Include File
677 * Insert Queued Macro
685 switch(asmc
->objtyp
) {
687 if ((len
= dbuf_getline (&dbuf_ib
, asmc
->fp
)) == 0) {
688 if ((asmc
->flevel
!= flevel
) || (asmc
->tlevel
!= tlevel
)) {
690 fprintf(stderr
, "?ASxxxx-Error-<i> at end of assembler file\n");
691 fprintf(stderr
, " %s\n", geterr('i'));
693 flevel
= asmc
->flevel
;
694 tlevel
= asmc
->tlevel
;
695 lnlist
= asmc
->lnlist
;
700 if ((lnlist
& LIST_PAG
) || (uflag
== 1)) {
705 if (asmline
++ == 0) {
706 strcpy(afn
, asmc
->afn
);
714 if ((len
= dbuf_getline (&dbuf_ib
, asmc
->fp
)) == 0) {
717 if ((asmc
->flevel
!= flevel
) || (asmc
->tlevel
!= tlevel
)) {
719 fprintf(stderr
, "?ASxxxx-Error-<i> at end of include file\n");
720 fprintf(stderr
, " %s\n", geterr('i'));
722 srcline
= asmc
->line
;
723 flevel
= asmc
->flevel
;
724 tlevel
= asmc
->tlevel
;
725 lnlist
= asmc
->lnlist
;
727 switch (asmc
->objtyp
) {
729 case T_ASM
: asmline
= srcline
; break;
730 case T_INCL
: incline
= srcline
; break;
731 case T_MACRO
: mcrline
= srcline
; break;
734 * Scan for parent file
737 while (asmt
!= NULL
) {
738 if (asmt
->objtyp
!= T_MACRO
) {
739 strcpy(afn
, asmt
->afn
);
745 if ((lnlist
& LIST_PAG
) || (uflag
== 1)) {
750 if (incline
++ == 0) {
751 strcpy(afn
, asmc
->afn
);
759 dbuf_append(&dbuf_ib
, "\0", dbuf_ib
.alloc
- 1);
760 ib
= (char *)dbuf_c_str (&dbuf_ib
);
761 ib
= fgetm(ib
, dbuf_ib
.alloc
- 1, asmc
->fp
);
763 dbuf_set_length(&dbuf_ib
, 0);
765 srcline
= asmc
->line
;
766 flevel
= asmc
->flevel
;
767 tlevel
= asmc
->tlevel
;
768 lnlist
= asmc
->lnlist
;
770 switch (asmc
->objtyp
) {
772 case T_ASM
: asmline
= srcline
; break;
773 case T_INCL
: incline
= srcline
; break;
774 case T_MACRO
: mcrline
= srcline
; break;
779 dbuf_set_length(&dbuf_ib
, len
);
780 if (mcrline
++ == 0) {
788 fprintf(stderr
, "?ASxxxx-Internal-nxtline(objtyp)-Error.\n\n");
792 ib
= (char *)dbuf_c_str (&dbuf_ib
);
794 dbuf_append_str (&dbuf_ic
, ib
);
795 ic
= (char *)dbuf_c_str (&dbuf_ic
);
801 /*)Function VOID scanline()
803 * The function scanline() scans the assembler-source text line
804 * for a valid substitutable string. The only valid targets
805 * for substitution strings are strings beginning with a
806 * LETTER and containing any combination of DIGITS and LETTERS.
807 * If a valid target is found then the function replace() is
808 * called to search the ".define" substitution list. If there
809 * is some string substitution error (or error caused by a
810 * runaway recursion in replace) then scanline() returns a
811 * value of 1 else 0 is returned.
813 * If the assembler mnemonic .define, .undefine, .ifdef, or .ifndef
814 * is found then the function exits.
817 * int c temporary character value
818 * char id[] a string of maximum length NINPUT
821 * char ctype[] a character array which defines the
822 * type of character being processed.
823 * The index is the character
825 * int flevel IF-ELSE-ENDIF level
826 * char ib[] assembler-source text line
827 * char * ip pointer into the assembler-source text line
830 * int endline() aslex.c
831 * int getid() aslex.c
832 * int unget() aslex.c
833 * int replace() aslex.c
834 * int symeq() assym.c
837 * The assembler-source text line may be updated
838 * and a substitution made for the string id[].
851 while ((c
= endline()) != 0) {
852 if (ctype
[c
] & DIGIT
) {
853 while (ctype
[c
] & (LETTER
|DIGIT
)) c
= get();
856 if (ctype
[c
] & LETTER
) {
858 if (symeq(id
, ".define", 1)) {
861 if (symeq(id
, ".undefine", 1)) {
864 if (symeq(id
, ".ifdef", 1)) {
867 if (symeq(id
, ".ifndef", 1)) {
870 if (symeq(id
, ".iifdef", 1)) {
873 if (symeq(id
, ".iifndef", 1)) {
876 if (symeq(id
, ".if", 1) || symeq(id
, ".iif", 1)) {
879 if (symeq(id
, "def", 1)) {
882 if (symeq(id
, "ndef", 1)) {
895 /*)Function int replace(id)
897 * char * id a pointer to a string of
898 * maximum length NINPUT
900 * The function replace() scans the .define substitution list
901 * for a match to the string id[]. After the substitution is made
902 * to the assembler-source text line the current character position,
903 * ip, is set to the beginning of the substitution string. The
904 * function replace() returns a non-zero value if a substitutuion
905 * error is made else zero is returned.
907 * If the -bb option was specified and a listing file is open then
908 * the current assembler-source text line is listed before the
909 * substitution is made.
912 * char * p pointer to beginning of id
913 * char str[] temporary string
914 * char * frmt temporary listing format specifier
915 * struct def *dp pointer to .define definitions
918 * int a_bytes T line addressing size
919 * int bflag list source before substitution flag
920 * int cfile current input file number
921 * char ctype[] a character array which defines the
922 * type of character being processed.
923 * The index is the character
925 * char ib[] assembler-source text line
926 * char * ip pointer into the assembler-source text line
927 * FILE * lfp list output file handle
928 * int line current assembler source line number
929 * int lmode listing mode
930 * int lnlist LIST-NLIST state
931 * int pass assembler pass number
932 * int pflag paging flag
933 * int srcline source file line number
934 * int uflag -u, disable .list/.nlist processing
935 * int zflag case sensitivity flag
938 * int fprintf() c_library
939 * int getlnm() assubr.c
940 * VOID slew() aslist.c
941 * char * strcat() c_library
942 * char * strcpy() c_library
943 * int strlen() c_library
944 * int symeq() assym.c
947 * The assembler-source text line may be updated
948 * and a substitution made for the string id[].
960 * Check for .define substitution
964 if (dp
->d_dflag
&& symeq(id
, dp
->d_id
, zflag
)) {
965 if ((pass
== 2) && (bflag
== 2)) {
966 if (lfp
== NULL
|| lmode
== NLIST
) {
969 if ((lnlist
& LIST_SRC
) || (uflag
== 1)) {
971 * Get Correct Line Number
978 slew(lfp
, !pflag
&& ((lnlist
& LIST_PAG
) || (uflag
== 1)));
981 * Source listing only option.
985 case 2: frmt
= " %24s%5u %s\n"; break;
987 case 4: frmt
= " %32s%5u %s\n"; break;
989 fprintf(lfp
, frmt
, "", line
, ib
);
993 * Verify string space is available
995 if ((strlen(ib
) - strlen(id
) + strlen(dp
->d_define
)) > (NINPUT
*2 - 1)) {
999 * Beginning of Substitutable string
1001 p
= ip
- strlen(id
);
1003 * Make a copy of the string from the end of the
1004 * substitutable string to the end of the line.
1008 * Replace the substitutable string
1009 * with the new string and append
1010 * the tail of the original string.
1013 strcat(ib
, dp
->d_define
);
1024 /*)Function: int getlnm()
1026 * The function getlnm() returns the line number of the
1027 * originating assembler or include file.
1030 * struct asmf *asmt temporary pointer to the processing structure
1033 * struct asmf *asmc pointer to the current input processing structure
1034 * int asmline line number in current assembler file
1035 * int line line number
1041 * Sets line to the source file line number.
1050 if (asmc
->objtyp
== T_MACRO
) {
1052 while (asmt
!= NULL
) {
1053 switch (asmt
->objtyp
) {
1054 case T_ASM
: return(line
= asmline
);
1055 case T_INCL
: return(line
= asmt
->line
);
1056 default: asmt
= asmt
->next
; break;
1064 /*)Function int more()
1066 * The function more() scans the assembler-source text line
1067 * skipping white space (SPACES and TABS) and returns a (0)
1068 * if the end of the line or a comment delimeter (;) is found,
1069 * or a (1) if their are additional characters in the line.
1072 * int c next character from the
1073 * assembler-source text line
1079 * int getnb() aslex.c
1080 * VOID unget() aslex.c
1083 * use of getnb() and unget() updates the global pointer ip
1084 * the position in the current assembler-source text line
1094 return( (c
== '\0' || c
== ';') ? 0 : 1 );
1097 /*)Function char endline()
1099 * The function endline() scans the assembler-source text line
1100 * skipping white space (SPACES and TABS) and returns the next
1101 * character or a (0) if the end of the line is found or a
1102 * comment delimiter (;) is found.
1105 * int c next character from the
1106 * assembler-source text line
1112 * int getnb() aslex.c
1115 * use of getnb() updates the global pointer ip the
1116 * position in the current assembler-source text line
1125 return( (c
== '\0' || c
== ';') ? 0 : c
);
1128 /*)Function VOID chopcrlf(str)
1130 * char *str string to chop
1132 * The function chopcrlf() removes
1133 * LF, CR, LF/CR, or CR/LF from str.
1136 * char * p temporary string pointer
1137 * char c temporary character
1146 * All CR and LF characters removed.
1158 if ((c
== '\r') || (c
== '\n')) {