xog: slightly better debug output
[urforth.git] / level1 / 68_parse.f
blob52184a56aa0949f4765b9069c2eb8a04b74dec22
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ;; UrForth level 1: self-hosting 32-bit Forth compiler
3 ;; Copyright (C) 2020 Ketmar Dark // Invisible Vector
4 ;; GPLv3 ONLY
5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7 ;; yep, this is not thread-local
8 $value "(NUMBER-#-BASE)" 16
9 (hidden)
10 $value "(NUMBER-LEADING-SIGN?)" 1
11 (hidden)
14 ;; converts char to digit, without base checks
15 : (char->digit) ( ch -- digit true // false )
16 dup case
17 [char] 0 [char] 9 bounds-of [char] 0 - true endof
18 [char] A [char] Z bounds-of [char] A - 10 + true endof
19 [char] a [char] z bounds-of [char] a - 10 + true endof
20 drop false swap ;; so ch will be dropped
21 endcase
22 ; (hidden)
24 ;; converts the ascii character c (using base n1) to its binary equivalent n2,
25 ;; accompanied by a true flag. if the conversion is invalid, leaves only a false flag.
26 : digit ( c n1 -- n2 true // false )
27 dup +if
28 swap (char->digit) ifnot drop false ;; invalid digit
29 else 2dup <= if 2drop false else nip true endif
30 endif
31 else 2drop false endif ;; negative or zero base
34 : digit? ( ch base -- flag )
35 digit dup if nip endif
39 ;; convert the ASCII text beginning at addr with regard to BASE.
40 ;; the new value is accumulated into unsigned number u0, being left as u1.
41 ;; addr1 and count1 are unparsed part of the string
42 ;; will never read more than count bytes
43 ;; doesn't skip any spaces, doesn't parse prefixes and signs
44 ;; but skips '_'
45 : number-parse-simple ( addr count u0 -- addr1 count1 u1 )
46 over +if
47 >r ;; ( addr count | u )
48 ;; first must be a digit
49 over c@ base @ digit? if
50 ;; main loop
51 begin dup while
52 ;; ( addr count | u )
53 over c@
54 dup [char] _ = if drop ;; skip '_'
55 else ;; try digit
56 base @ digit ifnot break endif
57 r> base @ u* + >r
58 endif
59 /char
60 repeat
61 endif
63 endif
67 ;; convert the ASCII text beginning at addr with regard to BASE.
68 ;; the new value is accumulated into unsigned double number ud0, being left as ud1.
69 ;; addr1 and count1 are unparsed part of the string
70 ;; will never read more than count bytes
71 ;; doesn't skip any spaces, doesn't parse prefixes and signs
72 ;; but skips '_'
73 : dnumber-parse-simple ( addr count ud0lo ud0hi -- addr1 count1 ud1lo ud1hi )
74 2over nip +if
75 2>r ;; ( addr count | ud )
76 ;; first must be a digit
77 over c@ base @ digit? if
78 ;; main loop
79 begin dup while
80 ;; ( addr count | u )
81 over c@
82 dup [char] _ = if drop ;; skip '_'
83 else ;; try digit
84 base @ digit ifnot break endif
85 2r> base @ uds* rot u>d d+ 2>r
86 endif
87 /char
88 repeat
89 endif
90 2r>
91 endif
94 : >number ( ud1 c-addr1 u1 -- ud2 c-addr2 u2 )
95 2swap dnumber-parse-simple 2swap
99 ;; simple sigils
100 : number-parse-pfx-sigil ( addr count -- addr count newbase )
101 dup 1- -if 0 exit endif
102 over c@ case
103 [char] $ of 16 endof
104 [char] # of (number-#-base) dup 1 37 within ifnot drop 0 endif endof
105 [char] % of 2 endof
106 0 swap
107 endcase
108 dup if >r /char r> endif
111 : (number-base-char?) ( char checkdigit hexchar -- newbase // 0 )
112 swap if over base @ digit? if 2drop 0 exit endif endif
113 >r upcase-char
114 case
115 r> of 16 endof
116 [char] O of 8 endof
117 [char] B of 2 endof
118 [char] D of 10 endof
119 0 swap
120 endcase
123 ;; checkdigit: check if for a valid digit in a current base (not needed for "&...")
124 ;; hexchar: character for hexadecimal (they're different for "&..." and "0...")
125 : (number-parse-pfx-0&) ( addr count checkdigit hexchar -- addr count newbase )
126 2>r over 1+ c@ 2r> (number-base-char?)
127 dup if >r /2chars r> endif
128 ; (hidden)
130 : number-parse-pfx-0x& ( addr count -- addr count newbase )
131 dup 2 > ifnot 0 exit endif
132 over c@ case
133 [char] 0 of true [char] X endof
134 [char] & of false [char] H endof
135 0 swap
136 endcase
137 dup if (number-parse-pfx-0&) endif
140 ;; check suffixes
141 : number-parse-sfx ( addr count -- addr count newbase )
142 dup 2 > ifnot 0 exit endif
143 2dup + 1- c@ true [char] H (number-base-char?)
144 dup if swap 1- swap endif
148 ;; will return base according to prefix/suffix, and remove pfx/sfx from the string
149 ;; returns 0 if no special base change found
150 : number-parse-pfx-sfx ( addr count -- addr count newbase )
151 number-parse-pfx-sigil ?dup if exit endif
152 number-parse-pfx-0x& ?dup if exit endif
153 number-parse-sfx
156 : number-check-sign ( addr count -- addr count negflag )
157 false >r
158 dup +if
159 over c@ case
160 [char] - of /char rdrop true >r endof
161 [char] + of /char endof
162 endcase
163 endif
167 ;; parse prefix/suffix, and possible sign
168 ;; count is non-zero
169 ;; newbase can be zero if it isn't changed
170 : number-parse-pfx-sfx-sign ( addr count -- addr count negflag newbase )
171 (number-leading-sign?) if
172 number-check-sign >r number-parse-pfx-sfx r> swap
173 else ;; ans/2012 idiocity
174 number-parse-pfx-sfx >r number-check-sign r>
175 endif
176 ; (hidden)
179 ;; convert a character string left at addr to a signed number, using the current numeric base
180 : number ( addr count -- n true // false )
181 dup 0<= if 2drop false exit endif ;; check length
182 ;; ok, we have at least one valid char
183 number-parse-pfx-sfx-sign
184 base @ >r ?dup if base ! endif
185 nrot ;; ( negflag addr count )
186 ;; zero count means "nan"
187 dup 0> ifnot ;; no number chars besides a prefix
188 r> base ! ;; restore base
189 2drop drop false exit ;; exit with failure
190 endif
191 0 number-parse-simple r> base ! ;; ( negflag addr count u )
192 ;; if not fully parsed, it is nan
193 swap if drop 2drop false exit endif
194 ;; ( negflag addr u )
195 nip swap if negate endif
196 true
200 ;; compares ch with delimiter delim
201 ;; for BL delimiter, coerces all control chars to space
202 ;; will be called only when c is BL
203 : (parsebl=) ( delim ch -- flag ) nip bl <= ; (hidden)
205 ;; scans TIB, returns parsed word
206 ;; doesn't do any copying
207 ;; trailing delimiter is skipped
208 : (parse) ( c skip-leading-delim? -- addr count )
209 swap 0xff and ?dup ifnot bl endif swap ;; protection for zero delimiter
210 over bl = if ['] (parsebl=) else ['] = endif >r ;; get compare word cfa
211 ;; skip leading delimiters?
212 if begin dup tib-peekch r@ execute while tib-getch not-until endif
213 >in @ swap 0 ;; save starting position, push dummy char
214 begin ( delim oldch ) drop tib-getch dup while ( delim ch ) 2dup r@ execute until
215 rdrop >r drop ;; we need to subtract 1 if last char is not zero
216 >in @ over - r> if 1- 0 max endif swap tib @ + swap
217 \ endcr over .hex8 space dup . 2dup type space (tib-last-read-char@) . cr
218 ; (hidden)
221 : parse-skip-blanks ( -- ) begin tib-peekch ?dup while bl <= while tib-skipch repeat ;
222 : parse-skip-blanks-no-eol ( -- ) begin tib-peekch ?dup while dup bl > swap nl = or not-while tib-skipch repeat ;
223 \ this doesn't skip EOL itself
224 : parse-skip-to-eol ( -- ) (tib-last-read-char@) nl = ifnot begin tib-peekch ?dup while nl <> while tib-skipch repeat endif ;
226 ;; multiline comment
227 ;; (* .... *) -- opening eaten
228 : skip-comment-multiline ( -- ) begin tib-getch ?dup while [char] * = tib-peekch [char] ) = and until tib-skipch ;
230 ;; nested multiline comment
231 ;; (( .... )) -- opening eaten
232 : skip-comment-multiline-nested ( -- )
233 1 ;; current comment level
234 begin tib-getch ?dup while
235 8 lshift tib-peekch or case
236 0x2828 of tib-skipch 1+ endof ;; ((
237 0x2929 of tib-skipch 1- endof ;; ))
238 endcase
239 dup not-until
240 drop
244 : (parse-skip-comments-eol) ( -- )
245 tib-skipch ;; setup (tib-last-read-char)
246 parse-skip-to-eol
247 ; (hidden)
249 : (parse-skip-comments-multi) ( skipcfa -- )
250 tib-skipch tib-skipch ;; skip starting chars
251 execute-tail
252 ; (hidden)
254 : parse-skip-comments ( -- )
255 begin
256 parse-skip-blanks
257 false tib-peekch 8 lshift 1 tib-peekch-n or case
258 0x3b3b of (parse-skip-comments-eol) endof ;; ;;
259 0x2f2f of (parse-skip-comments-eol) endof ;; //
260 0x282a of ['] skip-comment-multiline (parse-skip-comments-multi) endof ;; (*
261 0x2828 of ['] skip-comment-multiline-nested (parse-skip-comments-multi) endof ;; ((
262 otherwise 2drop true
263 endcase
264 until
268 : (word-or-parse) ( c skip-leading-delim? -- here )
269 (parse)
270 1020 umin ;; truncate length
271 dup here ! ;; set counter
272 here cell+ swap move ;; copy string
273 here count + 0c! ;; put trailing zero byte
274 here
275 ; (hidden)
277 ;; WARNING! it is using "HERE", so "DP-TEMP" is in effect
278 ;; artificial word length limit: 1020 chars
279 ;; longer words will be properly scanned, but truncated
280 ;; adds trailing zero after the string (but doesn't include it in count)
281 ;; string is cell-counted
282 : word ( c -- wordhere ) true (word-or-parse) ;
283 : parse-to-here ( c -- wordhere ) false (word-or-parse) ;
285 : parse ( c -- addr count ) false (parse) ; ;; parse, don't skip leading delimiters
286 : parse-name ( -- addr count ) bl true (parse) ; ;; parse, skip leading delimiters