cosmetix
[urforth.git] / level1 / 36_str_ext_asm.f
blobb31aef366879c12338d364146088134df35cda40
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ;; UrForth level 1: self-hosting 32-bit Forth compiler
3 ;; Copyright (C) 2020 Ketmar Dark // Invisible Vector
4 ;; GPLv3 ONLY
5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9 ;; search the string specified by c-addr1 u1 for the string specified by c-addr2 u2.
10 ;; if flag is true, a match was found at c-addr3 with u3 characters remaining.
11 ;; if flag is false there was no match and c-addr3 is c-addr1 and u3 is u1.
12 code: SEARCH ( c-addr1 u1 c-addr2 u2 -- c-addr3 u3 flag )
13 push TOS
14 pushr EIP
15 mov ebx,[esp] ;; u2
16 cp ebx,0
17 jr z,.exitfound ;; this is what tester wants
18 jr le,.notfound
19 mov edx,[esp+8] ;; u1
20 cp edx,0
21 jr le,.notfound
22 mov edi,[esp+12] ;; c-addr1
23 add edx,edi ;; EDX is end address
24 .scanloop:
25 mov esi,[esp+4] ;; c-addr2
26 lodsb
27 mov ecx,edx
28 sub ecx,edi
29 jr be,.notfound
30 repnz scasb
31 jr nz,.notfound ;; no first char found
32 cmp ebx,1
33 jr z,.found ;; our pattern is one char, and it was found
34 mov ecx,ebx
35 dec ecx
36 mov eax,edx
37 sub eax,edi
38 cmp eax,ecx
39 jr c,.notfound ;; the rest is shorter than a pattern
40 push edi
41 repz cmpsb
42 pop edi
43 jr nz,.scanloop
44 .found:
45 dec edi ;; exact match found
46 sub edx,edi
47 mov [esp+12],edi ;; c-addr1
48 mov [esp+8],edx ;; u1
49 .exitfound:
50 mov TOS,1
51 jr @f
52 .notfound:
53 xor TOS,TOS
54 @@:
55 add esp,8
56 popr EIP
57 urnext
58 endcode
61 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
62 code: UPCASE-CHAR ( ch -- ch ) ;; koi8
63 ld eax,TOS ;; to avoid register dependency
64 call upcase_subr
65 movzx TOS,al
66 urnext
67 upcase_subr:
68 cp al,0x80
69 jr nc,.highascii
70 cp al,'a'
71 jr c,.done
72 cp al,'z'+1
73 jr nc,.done
74 sub al,32
75 .done:
76 ret
77 .highascii:
78 cp al,0xc0
79 jr nc,.highkoi
80 cp al,0xa3 ;; yo small
81 jr nz,.done
82 add al,0x10
83 ret
84 .highkoi:
85 or al,32
86 ret
87 endcode
89 code: UPCASE-STR ( addr count -- )
90 pop edi
91 cp TOS,0
92 jr le,.done
93 .convloop:
94 movzx eax,byte [edi]
95 ld ah,al
96 call upcase_subr
97 cp al,ah
98 jr z,@f
99 ld byte [edi],al
101 inc edi
102 dec ecx
103 jr nz,.convloop
104 .done:
105 pop TOS
106 urnext
107 endcode
109 code: LOCASE-CHAR ( ch -- ch )
110 ld eax,TOS ;; to avoid register dependency
111 call locase_subr
112 movzx TOS,al
113 urnext
114 locase_subr:
115 cp al,0x80
116 jr nc,.highascii
117 cp al,'A'
118 jr c,.done
119 cp al,'Z'+1
120 jr nc,.done
121 add al,32
122 .done:
124 .highascii:
125 cp al,0xe0
126 jr nc,.highkoi
127 cp al,0xb3 ;; yo big
128 jr nz,.done
129 sub al,0x10
131 .highkoi:
132 sub al,32
134 endcode
136 code: LOCASE-STR ( addr count -- )
137 pop edi
138 cp TOS,0
139 jr le,.done
140 .convloop:
141 movzx al,byte [edi]
142 ld ah,al
143 call locase_subr
144 cp al,ah
145 jr z,@f
146 ld byte [edi],al
148 inc edi
149 dec ecx
150 jr nz,.convloop
151 .done:
152 pop TOS
153 urnext
154 endcode
156 ;; this may be slower than the table solution, but it is smaller too
157 code: IS-ALPHA? ( ch -- flag )
158 ld eax,TOS ;; to avoid register dependency
159 xor TOS,TOS
160 cp al,0xc0
161 jr nc,.donealpha
162 cp al,0xa3
163 jr z,.donealpha
164 cp al,0xb3
165 jr z,.donealpha
166 cp al,'A'
167 jr c,.done
168 cp al,'Z'+1
169 jr c,.donealpha
170 cp al,'a'
171 jr c,.done
172 cp al,'z'+1
173 jr nc,.done
174 .donealpha:
175 inc TOS
176 .done:
177 urnext
178 endcode
181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
182 code: MEMEQU ( addr1 addr2 size -- equflag )
183 jecxz .zerolen
184 pop edi
185 xchg esi,[esp]
186 repz cmpsb
187 pop esi
188 setz cl
189 movzx TOS,cl
190 urnext
191 .zerolen:
192 add esp,4*2
193 inc TOS
194 urnext
195 endcode
197 code: MEMEQU-CI ( addr1 addr2 size -- equflag )
198 jecxz .zerolen
199 pop edi
200 xchg esi,[esp]
201 .cmploop:
202 lodsb
203 ld ah,byte [edi]
204 inc edi
205 ;; it may work
206 cp al,ah
207 jr nz,.trycase
208 .caseequ:
209 dec ecx
210 jr nz,.cmploop
211 ;; success
212 pop esi
213 ld TOS,1
214 urnext
216 .trycase:
217 xchg al,ah ;; AL:[addr2]; AH:[addr1]
218 call locase_subr
219 cmp al,ah
220 jr z,.caseequ
221 xchg al,ah ;; AL:[addr1]; AH:[addr2]
222 call locase_subr
223 cmp al,ah
224 jr z,.caseequ
225 ;; failure
226 pop esi
227 xor TOS,TOS
228 urnext
230 .zerolen:
231 add esp,4*2
232 inc TOS
233 urnext
234 endcode
237 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
238 ;; -1, 0 or 1
239 code: MEMCMP ( addr1 addr2 size -- n )
240 jecxz .zerolen
241 pop edi
242 xchg esi,[esp]
243 repz cmpsb
244 pop esi
245 jr z,.done
246 ld TOS,1
247 jr nc,.done
248 ld TOS,-1
249 .done:
250 urnext
251 .zerolen:
252 add esp,4*2
253 urnext
254 endcode
256 ;; -1, 0 or 1
257 code: MEMCMP-CI ( addr1 addr2 size -- n )
258 jecxz .zerolen
259 pop edi
260 xchg esi,[esp]
261 .cmploop:
262 lodsb
263 ld ah,byte [edi]
264 inc edi
265 ;; it may work
266 cp al,ah
267 jr nz,.trycase
268 .caseequ:
269 dec ecx
270 jr nz,.cmploop
271 ;; success
272 pop esi
273 urnext
275 .trycase:
276 xchg al,ah ;; AL:[addr2]; AH:[addr1]
277 call locase_subr
278 cmp al,ah
279 jr z,.caseequ
280 xchg al,ah ;; AL:[addr1]; AH:[addr2]
281 call locase_subr
282 cmp al,ah
283 jr z,.caseequ
284 ;; failure
285 pop esi
286 ld TOS,1
287 jr nc,.done
288 ld TOS,-1
289 .done:
290 urnext
292 .zerolen:
293 add esp,4*2
294 inc TOS
295 urnext
296 endcode
299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
300 : S= ( addr0 count0 addr1 count1 -- flag ) rot over - if drop 2drop false else memequ endif ;
301 : S=CI ( addr0 count0 addr1 count1 -- flag ) rot over - if drop 2drop false else memequ-ci endif ;
303 : COMPARE ( c-addr1 u1 c-addr2 u2 -- n ) rot 2dup 2>r umin memcmp ?dup ifnot 2r> swap ucmp else 2rdrop endif ;
304 : COMPARE-CI ( c-addr1 u1 c-addr2 u2 -- n ) rot 2dup 2>r umin memcmp-ci ?dup ifnot 2r> swap ucmp else 2rdrop endif ;
307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
308 ;; search the string specified by addr size for the char ch.
309 ;; if flag is true, a match was found at addr1 with size1 characters remaining.
310 ;; if flag is false there was no match and addr1 is addr and size1 is size.
311 ;; if size1 is zero, flag is always false
312 code: MEMCHR ( addr size ch -- addr1 size1 flag )
313 ld eax,TOS
314 ld TOS,[esp]
315 jecxz .zerolen
316 ld edi,[esp+4]
317 repnz scasb
318 jr nz,.notfound
319 dec edi
320 inc ecx
321 ld [esp],ecx
322 ld [esp+4],edi
323 ld TOS,1
324 urnext
325 .notfound:
326 xor TOS,TOS
327 .zerolen:
328 urnext
329 endcode
332 code: MEMCHR-CI ( addr size ch -- addr1 size1 flag )
333 ld eax,TOS
334 call locase_subr
335 ld ah,al
336 ld TOS,[esp]
337 jecxz .zerolen
338 ld edi,[esp+4]
339 .cmploop:
340 ld al,byte [edi]
341 cp al,ah
342 jr z,.found
343 call locase_subr
344 cp al,ah
345 jr z,.found
346 inc edi
347 dec ecx
348 jr nz,.cmploop
349 ;; not found
350 .zerolen:
351 urnext
352 .found:
353 ld [esp],TOS
354 ld [esp+4],edi
355 ld TOS,1
356 urnext
357 endcode