l0: added "CELLS^", updated prebuilt binary
[urforth.git] / meta / meta-40-tc-compiler-20-create.f
blob68df9ad09b48fc3655baccde4a09877fd69ca39c
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ;; UrForth level 1: self-hosting 32-bit Forth compiler
3 ;; Metacompiler
4 ;; Copyright (C) 2020 Ketmar Dark // Invisible Vector
5 ;; GPLv3 ONLY
6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7 ;; wordlist management for metacompiler
8 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10 : tc-create; ( -- )
11 tc-here tc-latest-nfa tc-nfa->sfa tc-!
15 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16 ;; word header format:
17 ;; note than name hash is ALWAYS calculated with ASCII-uppercased name
18 ;; (actually, bit 5 is always reset for all bytes, because we don't need the
19 ;; exact uppercase, only something that resembles it)
20 ;; bfa points to next bfa or to 0 (this is "hash bucket pointer")
21 ;; before nfa, we have such "hidden" fields:
22 ;; dd dfa ; pointer to the debug data; can be 0 if debug info is missing
23 ;; dd sfa ; points *after* the last word byte
24 ;; dd bfa ; next word in hashtable bucket; it is always here, even if hashtable is turned off
25 ;; ; if there is no hashtable, this field is not used
26 ;; lfa:
27 ;; dd lfa ; previous word LFA or 0 (lfa links points here)
28 ;; dd namehash ; it is always here, and always calculated, even if hashtable is turned off
29 ;; nfa:
30 ;; dd flags-and-name-len ; see below
31 ;; db name ; no terminating zero or other "termination flag" here
32 ;; db namelen ; yes, name length again, so CFA->NFA can avoid guessing
33 ;; machine code follows
34 ;; here we usually have CALL to word handler
35 ;; 0xE8, 4-byte displacement
36 ;; (displacement is calculated from here)
38 ;; first word cell contains combined name length (low byte), argtype and flags (other bytes)
39 ;; layout:
40 ;; db namelen
41 ;; db argtype
42 ;; dw flags
44 ;; i.e. we have 16 bits for flags, and 256 possible argument types. why not.
46 ;; flags:
47 ;; bit 0: immediate
48 ;; bit 1: smudge
49 ;; bit 2: noreturn
50 ;; bit 3: hidden
51 ;; bit 4: codeblock
52 ;; bit 5: vocabulary
53 ;; bit 6: main scattered colon word (with "...")
54 ;; bit 7: macro (it *may* be inlined)
56 ;; argtype is the type of the argument that this word reads from the threaded code.
57 ;; possible argument types:
58 ;; 0: none
59 ;; 1: branch address
60 ;; 2: cell-size numeric literal
61 ;; 3: cell-counted string with terminating zero (not counted)
62 ;; 4: cfa of another word
63 ;; 5: cblock
64 ;; 6: vocid
65 ;; 7: byte-counted string with terminating zero (not counted)
66 ;; 8: unsigned byte
67 ;; 9: signed byte
68 ;; 10: unsigned word
69 ;; 11: signed word
71 ;; addr is NOT RVA!
72 : tc-(create-header-nocheck) ( addr count -- )
73 0 max tc-max-word-name-length min ;; sanitize length, just in case
74 tc-align-headers if
75 begin tc-here 3 and while 0 tc-c, 1 +to tc-align-headers-wasted repeat
76 endif
77 dup if tc-created-words-count 1+ to tc-created-words-count endif
78 \ endcr ." NEW HEADER AT 0x" tc-here .hex8 ." <" 2dup type ." >\n"
79 0 tc-, ;; allocate dfa
80 0 tc-, ;; allocate sfa
81 0 tc-, ;; allocate bfa (it will be patched later)
82 tc-here ;; remember HERE (it will become the new latest)
83 tc-latest-lfa tc-, ;; put lfa
84 tc-current tc-@ tc-! ;; update latest
85 dup if
86 ;; put name hash
87 2dup tc-str-name-hash-real-addr dup tc-,
88 ;; fix bfa
89 forth-hashtable-bits if
90 ;; ( addr count hash )
91 tc-current tc-@ tc-vocid-hashed? if
92 ;; fold hash
93 tc-name-hash-fold-mask cells \ endcr dup ." !!! 0x" .hex8 cr
94 ;; calc bucket address
95 tc-current tc-@ tc-vocid->htable +
96 ;; ( addr count bkptr )
97 ;; load old bfa link
98 dup tc-@
99 ;; ( addr count bkptr oldbfa )
100 ;; store current bfa address
101 tc-here tc-nfa->bfa rot tc-!
102 ;; update bfa
103 tc-here tc-nfa->bfa tc-!
104 else drop endif
105 else
106 drop ;; we don't really need any hash
107 endif
108 else
109 0 tc-, ;; namehash
110 endif
111 ;; remember HERE (we will need to fix some name fields later)
112 tc-here >r
113 ;; compile counter (we'll add flags and other things later)
114 ;; it is guaranteed that all fields except length are zero here
115 ;; (due to word header layout, and length check above)
116 dup tc-,
117 dup if
118 ;; copy parsed word to HERE (and allocate name bytes for it)
119 dup tc-n-allot tc->real swap move
120 ;; change the case of a new name?
121 tc-create-case? ?dup if r@ tc->real count rot +if upcase-str else locase-str endif endif
122 else 2drop endif
123 ;; put flags (ffa is 16 bits at nfa+2)
124 (wflag-smudge) ;; current_mode or
125 r@ tc-nfa->ffa tc-tfa! ;; it is safe to poke here, ffa is empty
126 tc-align-cfa tc-align-pfa or if ;; align CFA at 4 bytes; put incremented length byte just before CFA
127 0 tc-c,
128 r@ tc-c@ begin tc-here 3 and while 1+ 0 tc-c, 1 +to tc-align-cfa-wasted repeat
129 dup 255 u> ?abort" aligned name too long"
130 tc-here 1- tc-c!
131 else
132 r@ tc-c@ tc-c, ;; put length again (trailing length byte)
133 endif
134 r@ tc->real dup c@ swap 4+ swap ;; we are at CFA, fixup references ( addr count value forward -- )
135 \ endcr ." CFA AT 0x" tc-here .hex8 cr
136 tc-here tc-create-forth-label
137 rdrop ;; we don't need nfa address anymore
138 tc-create; ;; setup initial size, why not
141 ;; addr is NOT RVA!
142 : tc-(create-header) ( addr count -- )
143 ;; check length
144 dup 1 tc-max-word-name-length bounds? not-?abort" invalid word name"
145 ensure-forth-hashtable
146 ;; check for duplicate word?
147 \ true if
148 ;; look only in the current dictionary
149 2dup tc-current tc-@ tc-xcfind if
150 tc-cfa->nfa tc->real dup c@ swap cell+ swap type ." redefined" error-line. cr
151 endif
152 \ endif
153 tc-(create-header-nocheck)
157 : tc-create-header-named ( addr count -- )
158 tc-(create-header)
161 : tc-create-header ( -- ) \ word
162 parse-name tc-create-header-named
166 : (tc-xxx-hdr-str) ( dotype -- )
167 create ,
168 does> ( addr count pfa -- ) ( pfa: dotype )
169 @ nrot tc-(create-header) tc-smudge tc-compile-do-call
172 tc-rva-dovar (tc-xxx-hdr-str) tc-(variable-header-str)
173 tc-rva-doconst (tc-xxx-hdr-str) tc-(constant-header-str)
174 tc-rva-dovalue (tc-xxx-hdr-str) tc-(value-header-str)
175 tc-rva-dodefer (tc-xxx-hdr-str) tc-(defer-header-str)
178 0 value tc-(ua-rva)
180 : tc-get-ua-rva ( offs -- rva-addr )
181 tc-(ua-rva) ?dup ifnot
182 " ua_default_values" asmx86:asm-Get-Label 1 <> ?abort" ua_default_values must be defined here"
183 dup to tc-(ua-rva)
184 endif
189 ;; uservar is:
190 ;; dw ua_offset
191 ;; initvalues must be put to userarea at the given offset
192 : tc-(uservar-header-str) ( size addr count -- ua_offset )
193 rot dup 1 < ?abort" invalid uservar size"
194 tc-userarea-used over + tc-#userarea u> ?abort" too many uservars"
196 tc-(create-header)
197 tc-rva-douservar tc-compile-do-call
198 ;; put offset
199 tc-userarea-used dup tc-,
200 r> over + to tc-userarea-used
201 tc-create; tc-smudge
204 : tc-(userval-allot) ( n -- )
205 dup 0< ?abort" userallot must not be negative"
206 dup tc-userarea-used + tc-#userarea u> ?abort" too many uservars"
207 tc-userarea-used tc-get-ua-rva tc->real over erase
208 tc-userarea-used + to tc-userarea-used
212 ;; this a normal forth code; the magic is in its first word
213 : tc-(vocab-header-str) ( addr count -- )
214 tc-(create-header) tc-smudge tc-vocab
215 tc-rva-doforth tc-compile-do-call
216 ;; put vocab does
217 ;; " (VOCAB-DOES-CODE)" tc-compile,-(str)
218 tc-compile (VOCAB-DOES-CODE)