URFORTH.me updates
[urforth.git] / level0 / urforth0_w_parse.asm
blobad1f40cc05f56bb35b6986d7bb79b0c90a6a8439
1 ;; Native x86 GNU/Linux Forth System, Direct Threaded Code
2 ;;
3 ;; Copyright (C) 2020 Ketmar Dark // Invisible Vector
4 ;;
5 ;; This program is free software: you can redistribute it and/or modify
6 ;; it under the terms of the GNU General Public License as published by
7 ;; the Free Software Foundation, version 3 of the License ONLY.
8 ;;
9 ;; This program is distributed in the hope that it will be useful,
10 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ;; GNU General Public License for more details.
14 ;; You should have received a copy of the GNU General Public License
15 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
17 ;; if set to true (non-zero), "PARSE-NAME" will understand comments without space delimiters
18 ;; (except a lone banana)
19 urword_value "PARSE-NAME-ADV-COMMENTS",parse_name_adv_comments,1
22 ;DIGIT ( c n1 -- n2 tf ok)
23 ; ( c n1 -- ff bad)
24 ;Converts the ascii character c (using base n1) to its binary equivalent
25 ;n2, accompanied by a true flag. If the conversion is invalid, leaves
26 ;only a false flag.
27 urword_code "DIGIT",digit
28 pop eax
29 ; TOS=base
30 ; EAX=char
31 jecxz .bad
32 test TOS,0x80000000
33 jr nz,.bad
34 cp TOS,36+1
35 jr nc,.bad
36 sub al,'0'
37 jr c,.bad
38 cp al,10
39 jr c,.check_base
40 sub al,7
41 jr c,.bad
42 cp al,10
43 jr c,.bad
44 ; upcase it
45 cp al,36
46 jr c,@f
47 cp al,42
48 jr c,.bad
49 sub al,32
50 @@:
51 .check_base:
52 cp al,cl ; ECX is TOS
53 jr nc,.bad
54 movzx eax,al
55 push eax
56 mov TOS,1
57 urnext
58 .bad:
59 xor TOS,TOS
60 urnext
61 urword_end
63 urword_forth "DIGIT?",digitq
64 ;; ( ch base -- flag )
65 UF digit dup
66 ur_if
67 UF nip
68 ur_endif
69 urword_end
72 ;; convert the ASCII text beginning at addr with regard to BASE.
73 ;; the new value is accumulated into unsigned number u0, being left as u1.
74 ;; addr1 and count1 are unparsed part of the string
75 ;; will never read more than count bytes
76 ;; doesn't skip any spaces, doesn't parse prefixes and signs
77 ;; but skips '_'
78 urword_forth "(NUMBER-PARSE-SIMPLE)",par_number_parse_simple
79 ;; ( addr count u0 -- addr1 count1 u1 )
80 UF over 0great
81 ur_ifnot
82 UF exit
83 ur_endif
84 UF rpush
85 ;; ( addr count | u )
86 ;; first must be a digit
87 UF over cpeek base @ digitq
88 ur_if
89 ;; main loop
90 ur_begin
91 UF dup
92 ur_while
93 ;; ( addr count | u )
94 UF over cpeek
95 ;; skip '_'
96 UF dup 95 equal
97 ur_if
98 UF drop
99 ur_else
100 ;; try digit
101 UF base @ digit
102 ur_ifnot
103 UF rpop exit ;; replace with BREAK
104 ur_endif
105 UF rpop base @ umul + rpush
106 ur_endif
107 ;; skip char
108 UF 1dec swap 1inc swap
109 ur_repeat
110 ur_endif
111 UF rpop
112 urword_end
114 ;; k8: non-conforming, because we cannot parse double numbers
115 urword_forth ">NUMBER",in_number
116 ;; ( ud1 c-addr1 u1 -- ud2 c-addr2 u2 )
117 UF rot drop rot
118 UF par_number_parse_simple
119 UF nrot 0 nrot
120 urword_end
123 urword_forth "NUMBER-PARSE-PFX-SIGIL",number_parse_sigil
124 ;; ( addr count -- addr count newbase )
125 ;; simple sigils
126 UF dup 1 great
127 ur_ifnot
128 UF 0 exit
129 ur_endif
130 ;; simple sigils
131 UF over cpeek
132 ;; ( addr count char )
133 UF dup 36 equal ;; dollar
134 ur_if
135 UF drop swap 1inc swap 1dec
136 UF 16 exit
137 ur_endif
138 UF dup 35 equal ;; hash
139 ur_if
140 UF drop swap 1inc swap 1dec
141 UF shit2012shit 1 and
142 ur_if
143 UF 10
144 ur_else
145 UF 16
146 ur_endif
147 UF exit
148 ur_endif
149 UF 37 equal ;; percent
150 ur_if
151 UF swap 1inc swap 1dec
152 UF 2 exit
153 ur_endif
154 UF 0
155 urword_end
157 urword_forth "NUMBER-PARSE-PFX-0X",number_parse_0x
158 ;; ( addr count -- addr count newbase )
159 UF dup 2 great
160 ur_ifnot
161 UF 0 exit
162 ur_endif
163 UF over cpeek 48 equal
164 ur_ifnot
165 UF 0 exit
166 ur_endif
167 UF over 1inc cpeek upcase_char
168 UF dup 88 equal ;; X
169 ur_if
170 ;; ( addr count char )
171 UF drop swap 2inc swap 2dec
172 UF 16 exit
173 ur_endif
174 UF dup 79 equal ;; O
175 ur_if
176 UF drop swap 2inc swap 2dec
177 UF 8 exit
178 ur_endif
179 UF dup 66 equal ;; B
180 ur_if
181 UF drop swap 2inc swap 2dec
182 UF 2 exit
183 ur_endif
184 UF 68 equal ;; D
185 ur_if
186 UF swap 2inc swap 2dec
187 UF 10 exit
188 ur_endif
189 UF 0
190 urword_end
192 urword_forth "NUMBER-PARSE-PFX-&",number_parse_pfxamp
193 ;; ( addr count -- addr count newbase )
194 UF dup 2 great
195 ur_ifnot
196 UF 0 exit
197 ur_endif
198 UF over cpeek 38 equal
199 ur_ifnot
200 UF 0 exit
201 ur_endif
202 UF over 1inc cpeek upcase_char
203 UF dup 72 equal ;; H
204 ur_if
205 ;; ( addr count char )
206 UF drop swap 2inc swap 2dec
207 UF 16 exit
208 ur_endif
209 UF dup 79 equal ;; O
210 ur_if
211 UF drop swap 2inc swap 2dec
212 UF 8 exit
213 ur_endif
214 UF dup 66 equal ;; B
215 ur_if
216 UF drop swap 2inc swap 2dec
217 UF 2 exit
218 ur_endif
219 UF 68 equal ;; D
220 ur_if
221 UF swap 2inc swap 2dec
222 UF 10 exit
223 ur_endif
224 UF 0
225 urword_end
228 urword_forth "(NUMBER-GOOD-BIN-DIGIT?)",par_number_isbindig
229 urword_hidden
230 ;; ( ch -- flag )
231 UF dup 95 equal
232 ur_if
233 UF drop true exit
234 ur_endif
235 UF 2 digitq
236 urword_end
238 urword_forth "(NUMBER-PARSE-BOOL?)",number_parse_boolq
239 urword_hidden
240 ;; ( addr count -- flag )
241 UF dup 0great
242 ur_ifnot
243 UF 2drop 0 exit
244 ur_endif
245 ;; first must be digit
246 UF over cpeek 2 digitq
247 ur_ifnot
248 UF 2drop 0 exit
249 ur_endif
250 UF true nrot
251 UF over + swap
252 ur_do
253 ;; ( okflag )
254 UF i cpeek par_number_isbindig
255 ur_ifnot
256 UF drop false leave
257 ur_endif
258 ur_loop
259 urword_end
261 urword_forth "NUMBER-PARSE-SFX",number_parse_sfx
262 ;; ( addr count -- addr count newbase )
263 ;; check suffixes
264 UF dup 2 great
265 ur_ifnot
266 UF 0 exit
267 ur_endif
268 UF 2dup + 1dec cpeek upcase_char
269 UF dup 72 equal ;; H
270 ur_if
271 UF drop 1dec
272 UF 16 exit
273 ur_endif
274 UF dup 79 equal ;; O
275 ur_if
276 UF drop 1dec
277 UF 8 exit
278 ur_endif
279 ;; the following suffix is allowed only if all digits are right
280 UF 66 equal ;; B
281 ur_if
282 UF 1dec
283 ;; ( addr count )
284 UF 2dup number_parse_boolq
285 ur_if
286 UF 2 exit
287 ur_endif
288 UF 1inc
289 ur_endif
290 UF 0
291 urword_end
294 ;; will return base according to prefix/suffix, and remove pfx/sfx from the string
295 ;; returns 0 if no special base change found
296 urword_forth "NUMBER-PARSE-PFX-SFX",number_parse_prefix_suffix
297 ;; ( addr count -- addr count newbase )
298 UF number_parse_sigil qdup
299 ur_if
300 UF exit
301 ur_endif
302 UF number_parse_0x qdup
303 ur_if
304 UF exit
305 ur_endif
306 UF number_parse_pfxamp qdup
307 ur_if
308 UF exit
309 ur_endif
310 UF number_parse_sfx
311 urword_end
314 urword_forth "NUMBER",number
315 ;; ( addr count -- n true // false )
316 ; convert a character string left at addr to a signed number, using the current numeric base.
317 ;; check length
318 UF dup 0lessequ
319 ur_if
320 UF 2drop 0 exit
321 ur_endif
322 ;; ok, we have at least one valid char
323 ;; check for leading minus (only if 2012 so-called-standard idiocity is not turned on)
324 UF shit2012shit 1 and
325 ur_if
326 UF 0
327 ur_else
328 UF over cpeek 45 equal
329 ur_if
330 UF swap 1inc swap 1dec 1
331 ur_else
332 UF 0
333 ur_endif
334 ur_endif
335 UF nrot
336 ;; ( negflag addr count )
337 UF base @ rpush ;; it can be changed by number prefix/suffix
338 UF number_parse_prefix_suffix
339 ;; done checking
340 UF qdup
341 ur_if
342 UF base !
343 ur_endif
344 ;; check for leading minus (only if 2012 so-called-standard idiocity is turned on)
345 UF shit2012shit 1 and
346 ur_if
347 UF dup 0great
348 ur_if
349 UF over cpeek 45 equal
350 ur_if
351 UF swap 1inc swap 1dec
352 UF rot drop 1 nrot
353 ur_endif
354 ur_endif
355 ur_endif
356 ;; zero count means "nan"
357 UF dup 0great
358 ur_ifnot
359 ;; restore base
360 UF rpop base !
361 ;; exit with failure
362 UF 2drop drop 0 exit
363 ur_endif
364 ;; ( negflag addr count | oldbase )
365 UF 0 par_number_parse_simple
366 ;; ( negflag addr count u | oldbase )
367 ;; restore base
368 UF rpop base !
369 ;; ( negflag addr count u )
370 ;; if not fully parsed, it is nan
371 UF swap
372 ;; ( negflag addr u count )
373 ur_if
374 ;; exit with failure
375 UF drop 2drop 0 exit
376 ur_endif
377 ;; ( negflag addr u )
378 UF nip swap
379 ur_if
380 UF negate
381 ur_endif
382 ;; success
383 UF 1
384 urword_end
387 ;; scans TIB, returns parsed word
388 ;; doesn't do any copying
389 ;; trailing delimiter is skipped
390 ;; HACK: sets (WORD-LAST-DELIMITER)
391 ;; this is so "comment-to-eol" could work
392 urword_code "(WORD)",par_word
393 urword_hidden
394 ;; ( c skip-leading-delim? -- addr count )
395 ; reset last delimiter (it will be set later)
396 mov edx,[fvar_tibsize_data]
397 sub edx,[fvar_inptr_data]
398 jc .noinput_drop
399 mov edi,[fvar_tib_data]
400 add edi,[fvar_inptr_data]
401 ; do we need to skip leading delimiters?
402 jecxz .noskipdel
403 pop TOS ; get char in TOS
404 ; TOS=char
405 ; EDI=tibptr
406 ; EDX=tibleft
407 ; skip leading delimiters
408 .skipdel_loop:
409 or edx,edx
410 jz .noinput
411 call .cmp_cl_memedi
412 jz .skipchar
413 or al,al
414 jz .noinput
415 jmp .startword
416 .skipchar:
417 call .count_lines
418 inc edi
419 dec edx
420 jmp .skipdel_loop
422 .noskipdel:
423 pop TOS ; get char in TOS
425 .startword:
426 ; remember current position
427 push edi
428 ; skip until delimiter
429 .collect_loop:
430 or edx,edx
431 jz .done_noadv
432 ; count lines here, so we won't have to call it on word completion
433 call .count_lines
434 call .cmp_cl_memedi
435 jz .done
436 or al,al
437 jz .done_noadv
438 inc edi
439 dec edx
440 jmp .collect_loop
442 .done:
443 ; skip delimiter
444 or edx,edx
445 jr z,.done_noadv
446 cmp byte [edi],0
447 jr z,.done_noadv
448 dec edx
449 movzx eax,byte [edi]
450 ld [fvar_par_last_read_char_data],eax
451 .done_noadv:
452 ; the word is never empty here
453 ; stack: word start
454 ; EDI: word end (after the last char, at a delimiter)
455 ; EDX=tibleft
456 ; fix inptr
457 mov eax,[fvar_tibsize_data]
458 sub eax,edx
459 mov [fvar_inptr_data],eax
460 ; calc and store counter
461 pop ecx ; start address
462 mov eax,edi ; EAX=end address
463 sub eax,ecx ; EAX=length
464 ; truncate length
465 ld edx,1020
466 cp eax,edx
467 cmovnc eax,edx
468 ; ECX=start address
469 ; EDI=end address
470 ; EAX=length
471 ; copy bytes
472 push TOS
473 ld TOS,eax
474 urnext
476 .noinput_drop:
477 add esp,4 ; drop char, as we didn't poped it yet
478 .noinput:
479 ; TOS=char
480 ; EDX=tibleft
481 mov eax,[fvar_tibsize_data]
482 sub eax,edx
483 ld edx,0
484 cmovc eax,edx
485 mov [fvar_inptr_data],eax
486 ; push current tib position, and zero length
487 add eax,[fvar_tib_data]
488 push eax
489 xor TOS,TOS
490 urnext
492 ; zero flag: equality
493 ; al: byte at [edi]
494 .cmp_cl_memedi:
495 movzx eax,byte [edi]
496 ld [fvar_par_last_read_char_data],eax
497 cmp al,1
498 jnc .cmp_cl_memedi_nonzero
500 .cmp_cl_memedi_nonzero:
501 cmp cl,32
502 jnz .cmp_cl_memedi_ok
503 ; coerce to space
504 cmp al,32
505 jnc .cmp_cl_memedi_ok
506 mov al,32
507 .cmp_cl_memedi_ok:
508 cmp al,cl
511 .count_lines:
512 cp byte [edi],10
513 jr nz,@f
514 cp dword [fvar_tiblineno_data],0
515 jr z,@f
516 inc dword [fvar_tiblineno_data]
519 urword_end
521 ;; this is a leftover from '"WORD" is always using real here'
522 ;urword_forth "WORD-HERE",word_here
523 ; UF dp @
524 ;urword_end
525 urword_alias "WORD-HERE",word_here,here
527 urword_forth "(WORD-OR-PARSE)",par_word_or_parse
528 urword_hidden
529 ;; ( c skip-leading-delim? -- wordhere )
530 UF par_word
531 UF 1020 umin ;; truncate length
532 UF dup word_here ! ;; set counter
533 UF word_here cellinc swap move ;; copy string
534 UF word_here count + 0cpoke ;; put trailing zero byte
535 UF word_here
536 urword_end
538 ;; HACK: sets (WORD-LAST-DELIMITER)
539 ;; this is so "comment-to-eol" could work
540 ;; WARNING! it is using "HERE", so "DP-TEMP" is in effect
541 ;; artificial word length limit: 1020 chars
542 ;; longer words will be properly scanned, but truncated
543 ;; adds trailing zero after the string (but doesn't include it in count)
544 ;; string is cell-counted
545 urword_forth "WORD",word_obsolete
546 ;; ( c -- wordhere )
547 UF 1 par_word_or_parse
548 urword_end
550 urword_forth "PARSE-TO-HERE",parse_to_here
551 ;; ( c -- wordhere )
552 UF 0 par_word_or_parse
553 urword_end
555 urword_forth "PARSE",parse
556 ;; ( c -- addr count )
557 UF 0 par_word ;; parse, don't skip leading delimiters
558 urword_end
560 urword_forth "PARSE-SKIP-BLANKS",par_parse_skip_blanks
561 ;; ( -- )
562 ur_begin
563 UF tib_peekch qdup
564 ur_while
565 UF 32 great
566 ur_if
567 UF exit
568 ur_endif
569 UF tib_getch drop
570 ur_repeat
571 urword_end
573 urword_forth "PARSE-SKIP-BLANKS-NO-EOL",par_parse_skip_blanks_no_eol
574 ;; ( -- )
575 ur_begin
576 UF tib_peekch qdup
577 ur_while
578 UF dup 32 great
579 ur_if
580 UF drop exit
581 ur_endif
582 UF dup 13 equal swap 10 equal or
583 ur_if
584 UF exit
585 ur_endif
586 UF tib_getch drop
587 ur_repeat
588 urword_end
590 urword_forth "PARSE-SKIP-BLANKS-EX",par_parse_skip_blanks_ex
591 ;; ( skipeol-flag -- )
592 ur_if
593 UF par_parse_skip_blanks
594 ur_else
595 UF par_parse_skip_blanks_no_eol
596 ur_endif
597 urword_end
600 ;; simple heuristic: if the word ends with ")", try to find it in current vocabs
601 ;; as our searching is very fast, and this word is not invoked that often, it is ok
602 urword_forth "(PARSE-GOOD-COMMENT?)",par_parse_good_commentq
603 ;; ( addr count -- addr count flag )
604 if 0
605 UF 2dup endcr pardottype "CHECKING: <" type pardottype ">: "
606 UF 2dup wfind_str dup
607 UF dup dot cr
608 ur_if
609 UF nip
610 ur_endif
611 UF not
612 else
613 UF true
614 end if
615 urword_end
617 ;; sorry for this huge mess
618 urword_forth "(PARSE-NAME-EX)",par_parse_name_ex
619 urword_hidden
620 ;; ( -- addr count 1 // 0 )
621 UF par_tibstate_rpush
622 UF bl 1 par_word
623 if 0
624 ;; if word length is enough for normal comment processing, do it
625 UF dup 1 great
626 ur_ifnot
627 ;; empty word, or one-char word
628 UF par_tibstate_rdrop true exit
629 ur_endif
630 ;; check for single-char backslash
631 UF over cpeek 92 equal
632 ur_if
633 UF par_parse_good_commentq
634 ur_if
635 UF 2drop par_tibstate_rpop
636 UF par_parse_skip_blanks
637 ;; skip word char
638 UF tib_getch drop
639 UF comment_toeol
640 UF false exit
641 ur_else
642 UF true exit
643 ur_endif
644 ur_endif
645 end if
646 ;; check for double-char comments
647 UF dup 2 great
648 ur_ifnot
649 UF par_tibstate_rdrop true exit
650 ur_endif
651 ;; check two chars at once
652 UF over wpeek
653 ;; two semicolons or two slashes
654 UF dup 0x3b3b equal over 0x2f2f equal or
655 ur_if
656 UF drop
657 UF par_parse_good_commentq
658 ur_if
659 UF 2drop par_tibstate_rpop
660 UF par_parse_skip_blanks
661 ;; skip word char
662 UF tib_getch drop
663 UF comment_toeol
664 UF false exit
665 ur_else
666 UF true exit
667 ur_endif
668 ur_endif
669 ;; (* and (+
670 UF dup 0x2a28 equal over 0x2b28 equal or
671 ur_if
672 ;; UF nrot 2drop par_tibstate_rpop
673 UF nrot par_parse_good_commentq
674 ur_if
675 UF 2drop par_tibstate_rpop
676 ;; ( 2chars )
677 ;; skip to the word
678 UF par_parse_skip_blanks
679 ;; skip two word chars
680 UF tib_getch tib_getch 2drop
681 UF 0x2a28 equal
682 ur_if
683 UF comment_multiline
684 ur_else
685 UF comment_multiline_nested
686 ur_endif
687 UF false exit
688 ur_endif
689 ur_endif
690 ;; not a comment
691 UF drop par_tibstate_rdrop
692 UF true
693 urword_end
695 urword_forth "PARSE-NAME",parse_name
696 ;; ( -- addr count )
697 UF bl 1 par_word ;; parse, skip leading delimiters
698 urword_end
700 urword_forth "PARSE-NAME-EX",parse_name_ex
701 ;; ( -- addr count )
702 UF parse_name_adv_comments
703 ur_if
704 ;; save current tib, so we will be able to reparse comments
705 ur_begin
706 UF par_parse_name_ex
707 ur_until
708 ur_else
709 UF parse_name
710 ur_endif
711 urword_end