xog: slightly better debug output
[urforth.git] / level1 / 30_count_str.f
blob8fd0d80782b519d7c38cd59a898867d8b6891a6a
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ;; UrForth level 1: self-hosting 32-bit Forth compiler
3 ;; Copyright (C) 2020 Ketmar Dark // Invisible Vector
4 ;; GPLv3 ONLY
5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7 code: NULLSTRING ( addr 0 -- )
8 push TOS
9 push nullstring_data
10 xor TOS,TOS
11 urnext
12 nullstring_data: dw 0,0
13 endcode
16 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17 ;; length of asciiz string
18 code: ZCOUNT ( addr -- addr count )
19 push TOS
20 mov edi,TOS
21 xor al,al
22 cp [edi],al
23 jr nz,@f
24 xor TOS,TOS
25 urnext
26 @@:
27 mov edx,-1
28 mov ecx,edx
29 repnz scasb
30 sub edx,ecx
31 mov ecx,edx
32 dec ecx
33 urnext
34 endcode
36 : zcount-only ( addr -- count )
37 zcount nip
41 alias @ COUNT-ONLY ( addr -- count )
43 code: COUNT ( addr -- addr+4 count )
44 ;;UF dup count_only swap cellinc swap exit
45 ld eax,[TOS]
46 add TOS,4
47 push TOS
48 ld TOS,eax
49 urnext
50 endcode
53 alias W@ WCOUNT-ONLY ( addr -- count )
55 code: WCOUNT ( addr -- addr+2 count )
56 movzx eax,word [TOS]
57 add TOS,2
58 push TOS
59 mov TOS,eax
60 urnext
61 endcode
63 alias C@ CCOUNT-ONLY ( addr -- count )
64 alias C@ BCOUNT-ONLY ( addr -- count )
66 code: CCOUNT ( addr -- addr+1 count )
67 movzx eax,byte [TOS]
68 inc TOS
69 push TOS
70 mov TOS,eax
71 urnext
72 endcode
74 alias CCOUNT BCOUNT
77 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
78 ;; adjust the character string at c-addr1 by n characters.
79 ;; the resulting character string, specified by c-addr2 u2,
80 ;; begins at c-addr1 plus n characters and is u1 minus n characters long.
81 ;; doesn't check length, allows negative n.
82 : /STRING ( c-addr1 u1 n -- c-addr2 u2 )
83 dup >r - swap r> + swap
86 ;; checks length, doesn't strip anything from an empty string
87 : /CHAR ( c-addr1 u1 -- c-addr+1 u1-1 )
88 1- dup -if drop 0 else swap 1+ swap endif
91 ;; checks length, doesn't strip anything from an empty string
92 : /2CHARS ( c-addr1 u1 -- c-addr+2 u1-2 )
93 2- dup -if drop 0 else swap 2+ swap endif
97 ;; if n1 is greater than zero, n2 is equal to n1 less the number of
98 ;; spaces at the end of the character string specified by c-addr n1.
99 ;; if n1 is zero or the entire string consists of spaces, n2 is zero.
100 : -TRAILING ( c-addr n1 -- c-addr n2 )
101 0 max begin dup while 2dup + 1- c@ bl <= while 1- repeat
104 ;; strips leading blanks
105 : -LEADING ( c-addr1 n1 -- c-addr2 n2 )
106 0 max begin dup while over c@ bl <= while /char repeat
110 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
111 ;; converts some escape codes in-place
112 ;; used for `."` and `"`
113 ;; the resulting string is never bigger than a source one
114 ;; this will not preserve the trailing zero byte
115 code: STR-UNESCAPE ( addr count -- addr count )
116 jecxz .quit
117 ld edi,[esp]
118 push ecx
120 .scanloop:
121 ld al,92
122 repnz scasb
123 jecxz .done ;; nope
124 jr nz,.done
125 ;; edi is after backslash
126 ;; ecx is number of chars left after backslash
127 ;; found backslash, check next char
128 movzx eax,byte [edi]
129 ld ah,al
130 or ah,32 ;; lowercase it
131 ;; '\r'?
132 cp ah,'r'
133 jr nz,@f
134 ld al,13
135 jr .replace_one
137 ;; '\n'?
138 cp ah,'n'
139 jr nz,@f
140 ld al,10
141 jr .replace_one
143 ;; '\t'?
144 cp ah,'t'
145 jr nz,@f
146 ld al,9
147 jr .replace_one
149 ;; '\b'? (bell)
150 cp ah,'b'
151 jr nz,@f
152 ld al,7
153 jr .replace_one
155 ;; '\e'? (escape)
156 cp ah,'e'
157 jr nz,@f
158 ld al,27
159 jr .replace_one
161 ;; '\z'? (zero)
162 cp ah,'z'
163 jr nz,@f
164 xor al,al
165 jr .replace_one
167 ;; '\`'? (double quote)
168 cp ah,'`'
169 jr nz,@f
170 ld al,'"' ;; '
171 jr .replace_one
173 ;; 'xHH'?
174 cp ah,'x'
175 jr z,.esc_hex
176 jr .replace_one
177 .loop_cont:
178 dec ecx
179 jr nz,.scanloop
181 .done:
182 pop ecx
183 .quit:
184 urnext
186 .replace_one:
187 ld [edi-1],al
188 dec dword [esp]
189 dec ecx
190 jecxz .done
191 ;; move
192 push esi
193 push edi
194 push ecx
195 ld esi,edi
196 inc edi
197 .domove:
198 xchg esi,edi
199 rep movsb
200 pop ecx
201 pop edi
202 pop esi
203 jr .scanloop
205 .esc_hex:
206 cp ecx,2
207 jr c,.loop_cont
208 movzx eax,byte [edi+1]
209 call .hexdigit
210 jr c,.loop_cont
211 ;; save original string position
212 push esi
213 ld esi,edi
214 ;; skip 'x'
215 inc edi ;; skip 'x'
216 dec ecx
217 dec dword [esp+4]
218 ;; skip first digit
219 inc edi
220 dec ecx
221 dec dword [esp+4]
222 jecxz .esc_hex_done
223 ld ah,al
224 ld al,[edi]
225 call .hexdigit
226 jr nc,@f
227 ld al,ah
228 jr .esc_hex_done
230 ;; combine two hex digits
231 shl ah,4
232 or al,ah
233 ;; skip second digit
234 inc edi
235 dec ecx
236 dec dword [esp+4]
237 .esc_hex_done:
238 ld [esi-1],al
239 jecxz .esc_hex_quit
240 ;; remove leftover chars
241 ;; ECX: chars left
242 ;; ESI: position after backslash
243 ;; EDI: rest position
244 ;; old ESI is on the stack
245 push esi ;; to be restored in EDI
246 push ecx
247 xchg esi,edi
248 rep movsb
249 pop ecx
250 pop edi ;; get back to backslash
251 pop esi ;; restore old ESI
252 jp .scanloop
254 .esc_hex_quit:
255 pop esi
256 jr .done
258 .hexdigit:
259 sub al,'0'
260 jr c,.hexdigit_done
261 cp al,10
263 jr nc,.hexdigit_done
264 sub al,7
265 jr c,.hexdigit_done
266 cp al,16
268 jr nc,.hexdigit_done
269 ;; maybe its lowercase?
270 cp al,42
271 jr c,.hexdigit_done
272 sub al,32
273 cp al,16
275 .hexdigit_done:
277 endcode