level1/68_parse.f

   1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   2 ;; UrForth level 1: self-hosting 32-bit Forth compiler
   3 ;; Copyright (C) 2020 Ketmar Dark // Invisible Vector
   4 ;; GPLv3 ONLY
   5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   6
   7 ;; yep, this is not thread-local
   8 $value "(NUMBER-#-BASE)" 16
   9 (hidden)
  10 $value "(NUMBER-LEADING-SIGN?)" 1
  11 (hidden)
  12
  13
  14 ;; converts char to digit, without base checks
  15 : (char->digit)  ( ch -- digit true // false )
  16   dup case
  17     [char] 0 [char] 9 bounds-of [char] 0 - true endof
  18     [char] A [char] Z bounds-of [char] A - 10 + true endof
  19     [char] a [char] z bounds-of [char] a - 10 + true endof
  20     drop false swap  ;; so ch will be dropped
  21   endcase
  22 ; (hidden)
  23
  24 ;; converts the ascii character c (using base n1) to its binary equivalent n2,
  25 ;; accompanied by a true flag. if the conversion is invalid, leaves only a false flag.
  26 : digit  ( c n1 -- n2 true // false )
  27   dup +if
  28     swap (char->digit) ifnot drop false  ;; invalid digit
  29     else 2dup <= if 2drop false else nip true endif
  30     endif
  31   else 2drop false endif  ;; negative or zero base
  32 ;
  33
  34 : digit?  ( ch base -- flag )
  35   digit dup if nip endif
  36 ;
  37
  38
  39 ;; convert the ASCII text beginning at addr with regard to BASE.
  40 ;; the new value is accumulated into unsigned number u0, being left as u1.
  41 ;; addr1 and count1 are unparsed part of the string
  42 ;; will never read more than count bytes
  43 ;; doesn't skip any spaces, doesn't parse prefixes and signs
  44 ;; but skips '_'
  45 : number-parse-simple  ( addr count u0 -- addr1 count1 u1 )
  46   over +if
  47     >r  ;; ( addr count | u )
  48     ;; first must be a digit
  49     over c@ base @ digit? if
  50       ;; main loop
  51       begin dup while
  52         ;; ( addr count | u )
  53         over c@
  54         dup [char] _ = if drop  ;; skip '_'
  55         else  ;; try digit
  56           base @ digit ifnot break endif
  57           r> base @ u* + >r
  58         endif
  59         /char
  60       repeat
  61     endif
  62     r>
  63   endif
  64 ;
  65
  66
  67 ;; convert the ASCII text beginning at addr with regard to BASE.
  68 ;; the new value is accumulated into unsigned double number ud0, being left as ud1.
  69 ;; addr1 and count1 are unparsed part of the string
  70 ;; will never read more than count bytes
  71 ;; doesn't skip any spaces, doesn't parse prefixes and signs
  72 ;; but skips '_'
  73 : dnumber-parse-simple  ( addr count ud0lo ud0hi -- addr1 count1 ud1lo ud1hi )
  74   2over nip +if
  75     2>r  ;; ( addr count | ud )
  76     ;; first must be a digit
  77     over c@ base @ digit? if
  78       ;; main loop
  79       begin dup while
  80         ;; ( addr count | u )
  81         over c@
  82         dup [char] _ = if drop  ;; skip '_'
  83         else  ;; try digit
  84           base @ digit ifnot break endif
  85           2r> base @ uds* rot u>d d+ 2>r
  86         endif
  87         /char
  88       repeat
  89     endif
  90     2r>
  91   endif
  92 ;
  93
  94 : >number  ( ud1 c-addr1 u1 -- ud2 c-addr2 u2 )
  95   2swap dnumber-parse-simple 2swap
  96 ;
  97
  98
  99 ;; simple sigils
 100 : number-parse-pfx-sigil  ( addr count -- addr count newbase )
 101   dup 1- -if 0 exit endif
 102   over c@ case
 103     [char] $ of 16 endof
 104     [char] # of (number-#-base) dup 1 37 within ifnot drop 0 endif endof
 105     [char] % of 2 endof
 106     0 swap
 107   endcase
 108   dup if >r /char r> endif
 109 ;
 110
 111 : (number-base-char?)  ( char checkdigit hexchar -- newbase // 0 )
 112   swap if over base @ digit? if 2drop 0 exit endif endif
 113   >r upcase-char
 114   case
 115     r> of 16 endof
 116     [char] O of 8 endof
 117     [char] B of 2 endof
 118     [char] D of 10 endof
 119     0 swap
 120   endcase
 121 ;
 122
 123 ;; checkdigit: check if for a valid digit in a current base (not needed for "&...")
 124 ;; hexchar: character for hexadecimal (they're different for "&..." and "0...")
 125 : (number-parse-pfx-0&)  ( addr count checkdigit hexchar -- addr count newbase )
 126   2>r over 1+ c@ 2r> (number-base-char?)
 127   dup if >r /2chars r> endif
 128 ; (hidden)
 129
 130 : number-parse-pfx-0x&  ( addr count -- addr count newbase )
 131   dup 2 > ifnot 0 exit endif
 132   over c@ case
 133     [char] 0 of true [char] X endof
 134     [char] & of false [char] H endof
 135     0 swap
 136   endcase
 137   dup if (number-parse-pfx-0&) endif
 138 ;
 139
 140 ;; check suffixes
 141 : number-parse-sfx  ( addr count -- addr count newbase )
 142   dup 2 > ifnot 0 exit endif
 143   2dup + 1- c@ true [char] H (number-base-char?)
 144   dup if swap 1- swap endif
 145 ;
 146
 147
 148 ;; will return base according to prefix/suffix, and remove pfx/sfx from the string
 149 ;; returns 0 if no special base change found
 150 : number-parse-pfx-sfx  ( addr count -- addr count newbase )
 151   number-parse-pfx-sigil ?dup if exit endif
 152   number-parse-pfx-0x& ?dup if exit endif
 153   number-parse-sfx
 154 ;
 155
 156 : number-check-sign  ( addr count -- addr count negflag )
 157   false >r
 158   dup +if
 159     over c@ case
 160       [char] - of /char rdrop true >r endof
 161       [char] + of /char endof
 162     endcase
 163   endif
 164   r>
 165 ;
 166
 167 ;; parse prefix/suffix, and possible sign
 168 ;; count is non-zero
 169 ;; newbase can be zero if it isn't changed
 170 : number-parse-pfx-sfx-sign  ( addr count -- addr count negflag newbase )
 171   (number-leading-sign?) if
 172     number-check-sign >r number-parse-pfx-sfx r> swap
 173   else  ;; ans/2012 idiocity
 174     number-parse-pfx-sfx >r number-check-sign r>
 175   endif
 176 ; (hidden)
 177
 178
 179 ;; convert a character string left at addr to a signed number, using the current numeric base
 180 : number  ( addr count -- n true // false )
 181   dup 0<= if 2drop false exit endif  ;; check length
 182   ;; ok, we have at least one valid char
 183   number-parse-pfx-sfx-sign
 184   base @ >r ?dup if base ! endif
 185   nrot  ;; ( negflag addr count )
 186   ;; zero count means "nan"
 187   dup 0> ifnot  ;; no number chars besides a prefix
 188     r> base !   ;; restore base
 189     2drop drop false exit  ;; exit with failure
 190   endif
 191   0 number-parse-simple r> base !  ;; ( negflag addr count u )
 192   ;; if not fully parsed, it is nan
 193   swap if drop 2drop false exit endif
 194   ;; ( negflag addr u )
 195   nip swap if negate endif
 196   true
 197 ;
 198
 199
 200 ;; compares ch with delimiter delim
 201 ;; for BL delimiter, coerces all control chars to space
 202 ;; will be called only when c is BL
 203 : (parsebl=)  ( delim ch -- flag )  nip bl <= ; (hidden)
 204
 205 ;; scans TIB, returns parsed word
 206 ;; doesn't do any copying
 207 ;; trailing delimiter is skipped
 208 : (parse)  ( c skip-leading-delim? -- addr count )
 209   swap 0xff and ?dup ifnot bl endif swap  ;; protection for zero delimiter
 210   over bl = if ['] (parsebl=) else ['] = endif >r  ;; get compare word cfa
 211   ;; skip leading delimiters?
 212   if begin dup tib-peekch r@ execute while tib-getch not-until endif
 213   >in @  swap 0  ;; save starting position, push dummy char
 214   begin ( delim oldch ) drop tib-getch dup while ( delim ch ) 2dup r@ execute until
 215   rdrop >r drop  ;; we need to subtract 1 if last char is not zero
 216   >in @ over - r> if 1- 0 max endif swap tib @ + swap
 217     \ endcr over .hex8 space dup . 2dup type space (tib-last-read-char@) . cr
 218 ; (hidden)
 219
 220
 221 : parse-skip-blanks  ( -- )  begin tib-peekch ?dup while bl <= while tib-skipch repeat ;
 222 : parse-skip-blanks-no-eol  ( -- )  begin tib-peekch ?dup while dup bl > swap nl = or not-while tib-skipch repeat ;
 223 \ this doesn't skip EOL itself
 224 : parse-skip-to-eol  ( -- )  (tib-last-read-char@) nl = ifnot begin tib-peekch ?dup while nl <> while tib-skipch repeat endif ;
 225
 226 ;; multiline comment
 227 ;; (* .... *) -- opening eaten
 228 : skip-comment-multiline  ( -- )  begin tib-getch ?dup while [char] * = tib-peekch [char] ) = and until tib-skipch ;
 229
 230 ;; nested multiline comment
 231 ;; (( .... )) -- opening eaten
 232 : skip-comment-multiline-nested  ( -- )
 233   1  ;; current comment level
 234   begin tib-getch ?dup while
 235     8 lshift tib-peekch or case
 236       0x2828 of tib-skipch 1+ endof  ;; ((
 237       0x2929 of tib-skipch 1- endof  ;; ))
 238     endcase
 239   dup not-until
 240   drop
 241 ;
 242
 243
 244 : (parse-skip-comments-eol)  ( -- )
 245   tib-skipch  ;; setup (tib-last-read-char)
 246   parse-skip-to-eol
 247 ; (hidden)
 248
 249 : (parse-skip-comments-multi)  ( skipcfa -- )
 250   tib-skipch tib-skipch  ;; skip starting chars
 251   execute-tail
 252 ; (hidden)
 253
 254 : parse-skip-comments  ( -- )
 255   begin
 256     parse-skip-blanks
 257     false  tib-peekch 8 lshift  1 tib-peekch-n or case
 258       0x3b3b of (parse-skip-comments-eol) endof  ;; ;;
 259       0x2f2f of (parse-skip-comments-eol) endof  ;; //
 260       0x282a of ['] skip-comment-multiline (parse-skip-comments-multi) endof  ;; (*
 261       0x2828 of ['] skip-comment-multiline-nested (parse-skip-comments-multi) endof  ;; ((
 262       otherwise 2drop true
 263     endcase
 264   until
 265 ;
 266
 267
 268 : (word-or-parse)  ( c skip-leading-delim? -- here )
 269   (parse)
 270   1020 umin             ;; truncate length
 271   dup here !            ;; set counter
 272   here cell+ swap move  ;; copy string
 273   here count + 0c!      ;; put trailing zero byte
 274   here
 275 ; (hidden)
 276
 277 ;; WARNING! it is using "HERE", so "DP-TEMP" is in effect
 278 ;; artificial word length limit: 1020 chars
 279 ;; longer words will be properly scanned, but truncated
 280 ;; adds trailing zero after the string (but doesn't include it in count)
 281 ;; string is cell-counted
 282 : word  ( c  -- wordhere )  true (word-or-parse) ;
 283 : parse-to-here  ( c -- wordhere )  false (word-or-parse) ;
 284
 285 : parse  ( c -- addr count )  false (parse) ;  ;; parse, don't skip leading delimiters
 286 : parse-name  ( -- addr count )  bl true (parse) ;  ;; parse, skip leading delimiters